Skip to main content

pdfv_core/
lib.rs

1#![forbid(unsafe_code)]
2#![warn(rust_2024_compatibility, missing_docs, missing_debug_implementations)]
3//! Public contracts for the pdfv validation engine.
4//!
5//! The crate currently exposes the stable data model, error model, and JSON
6//! report writing spine used by later parser and validator phases.
7//!
8//! ```
9//! use pdfv_core::{InputKind, InputSummary, ValidationOptions};
10//!
11//! let options = ValidationOptions::default();
12//! let source = InputSummary::new(InputKind::Memory, None, None);
13//! assert!(options.report_parse_warnings);
14//! assert_eq!(source.kind, InputKind::Memory);
15//! ```
16
17mod generated_profiles;
18mod parser;
19mod profile;
20mod validation;
21mod xmp;
22
23use std::{
24    collections::BTreeMap,
25    fmt,
26    io::{self, Write},
27    num::{NonZeroU32, NonZeroU64},
28    path::{Path, PathBuf},
29    time::Duration,
30};
31
32pub use parser::{
33    CosObject, DecodeParams, DecoderRegistry, Dictionary, IndirectObject, ObjectStore,
34    ParseOptions, ParsedDocument, Parser, PdfName, PdfSource, PdfString, SourceStorage,
35    StreamDecoder, StreamObject, Trailer,
36};
37#[cfg(feature = "custom-profiles")]
38pub use profile::CustomProfileRepository;
39pub use profile::{
40    BinaryOp, BuiltinFunction, BuiltinProfileRepository, ErrorTemplate, ModelValue, ObjectTypeName,
41    ProfileCatalogEntry, ProfileImportSummary, ProfileRepository, PropertyName, PropertyPath, Rule,
42    RuleEvaluator, RuleExpr, RuleOutcome, UnaryOp, ValidationProfile, display_flavour,
43    import_verapdf_profile_xml,
44};
45use secrecy::{ExposeSecret, SecretString};
46use serde::{Deserialize, Serialize};
47use thiserror::Error;
48use typed_builder::TypedBuilder;
49pub use validation::{
50    AnnotationModel, CatalogModel, ContentStreamModel, FeatureSelection, FontModel, InputName,
51    LinkName, MetadataModel, ModelGraph, ModelObject, ModelObjectRef, ObjectIdentity,
52    OutputIntentModel, PageModel, Validator,
53};
54pub use xmp::{
55    DetectedFlavours, FlavourClaim, FlavourDetector, NamespaceBinding, XmpIdentificationKind,
56    XmpPacket, XmpParser,
57};
58
59/// Current library version embedded in generated reports.
60pub const ENGINE_VERSION: &str = env!("CARGO_PKG_VERSION");
61
62const MAX_IDENTIFIER_BYTES: usize = 128;
63const MAX_TEXT_BYTES: usize = 4096;
64const DEFAULT_MAX_PASSWORD_BYTES: usize = 1024;
65const HARD_MAX_PASSWORD_BYTES: usize = 4096;
66const DEFAULT_MAX_STRING_BYTES: usize = 1_048_576;
67const DEFAULT_MAX_STREAM_DECODE_BYTES: u64 = 256 * 1024 * 1024;
68const DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES: u64 = 64;
69const DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES: u64 = 16 * 1024 * 1024;
70const DEFAULT_MAX_XMP_BYTES: u64 = 4 * 1024 * 1024;
71const DEFAULT_MAX_XMP_ELEMENTS: u64 = 50_000;
72const DEFAULT_MAX_XMP_DEPTH: u32 = 32;
73const DEFAULT_MAX_XMP_ATTRIBUTES: usize = 64;
74const DEFAULT_MAX_XMP_NAMESPACES: usize = 256;
75const DEFAULT_MAX_XMP_TEXT_BYTES: usize = 4096;
76
77/// Result alias for pdfv library operations.
78pub type Result<T> = std::result::Result<T, PdfvError>;
79
80/// Top-level library error.
81#[derive(Debug, Error)]
82#[non_exhaustive]
83pub enum PdfvError {
84    /// Input/output failure.
85    #[error("I/O error{path}: {source}", path = format_optional_path(.path.as_ref()))]
86    Io {
87        /// Path associated with the failure when available.
88        path: Option<PathBuf>,
89        /// Source I/O error.
90        #[source]
91        source: std::io::Error,
92    },
93    /// Parser failure.
94    #[error("parse error: {0}")]
95    Parse(#[from] ParseError),
96    /// Profile loading or selection failure.
97    #[error("profile error: {0}")]
98    Profile(#[from] ProfileError),
99    /// Validation engine failure.
100    #[error("validation error: {0}")]
101    Validation(#[from] ValidationError),
102    /// Policy loading or evaluation failure.
103    #[error("policy error: {0}")]
104    Policy(#[from] PolicyError),
105    /// Metadata repair failure.
106    #[error("repair error: {0}")]
107    Repair(#[from] RepairError),
108    /// Report serialization failure.
109    #[error("report error: {0}")]
110    Report(#[from] ReportError),
111    /// Configuration failure.
112    #[error("configuration error: {0}")]
113    Configuration(#[from] ConfigError),
114}
115
116/// Parser-specific error.
117#[derive(Debug, Error, Clone, Eq, PartialEq)]
118#[non_exhaustive]
119pub enum ParseError {
120    /// A configured parser resource limit was exceeded.
121    #[error("resource limit exceeded: {limit}")]
122    LimitExceeded {
123        /// Name of the exceeded limit.
124        limit: &'static str,
125    },
126    /// Checked arithmetic overflowed while processing input.
127    #[error("arithmetic overflow while parsing {context}")]
128    ArithmeticOverflow {
129        /// Parsing context that overflowed.
130        context: &'static str,
131    },
132    /// PDF syntax could not be recovered.
133    #[error("malformed PDF syntax: {message}")]
134    Malformed {
135        /// Bounded diagnostic message.
136        message: BoundedText,
137    },
138    /// A referenced object was missing or had the wrong shape.
139    #[error("missing PDF object: {message}")]
140    MissingObject {
141        /// Bounded diagnostic message.
142        message: BoundedText,
143    },
144    /// A stream filter is not supported by this phase.
145    #[error("unsupported stream filter: {filter}")]
146    UnsupportedFilter {
147        /// Filter name.
148        filter: BoundedText,
149    },
150    /// Stream decoding failed.
151    #[error("stream decode failed: {message}")]
152    StreamDecode {
153        /// Bounded diagnostic message.
154        message: BoundedText,
155    },
156}
157
158/// Profile-specific error.
159#[derive(Debug, Error, Clone, Eq, PartialEq)]
160#[non_exhaustive]
161pub enum ProfileError {
162    /// Profile selection did not resolve to a supported profile.
163    #[error("unsupported profile selection")]
164    UnsupportedSelection,
165    /// A profile field failed validation.
166    #[error("invalid profile field {field}: {reason}")]
167    InvalidField {
168        /// Field that failed validation.
169        field: &'static str,
170        /// Bounded reason string.
171        reason: BoundedText,
172    },
173    /// A rule expression exceeded a configured evaluation budget.
174    #[error("rule evaluation budget exceeded: {budget}")]
175    BudgetExceeded {
176        /// Budget that was exceeded.
177        budget: &'static str,
178    },
179    /// A rule referenced a property that does not exist on the model object.
180    #[error("unknown model property {property}")]
181    UnknownProperty {
182        /// Property name.
183        property: BoundedText,
184    },
185    /// A rule expression had a type mismatch.
186    #[error("rule expression type mismatch: {message}")]
187    TypeMismatch {
188        /// Bounded diagnostic message.
189        message: BoundedText,
190    },
191    /// A rule expression is not supported by the bounded IR.
192    #[error("unsupported rule expression: {reason}")]
193    UnsupportedRule {
194        /// Bounded reason string.
195        reason: BoundedText,
196    },
197    /// Profile XML failed bounded parsing.
198    #[error("invalid profile XML: {reason}")]
199    InvalidXml {
200        /// Bounded reason string.
201        reason: BoundedText,
202    },
203}
204
205/// Validation-specific error.
206#[derive(Debug, Error, Clone, Eq, PartialEq)]
207#[non_exhaustive]
208pub enum ValidationError {
209    /// Validation could not complete because a required subsystem is unavailable.
210    #[error("validation subsystem is unavailable: {subsystem}")]
211    SubsystemUnavailable {
212        /// Subsystem name.
213        subsystem: &'static str,
214    },
215    /// Validation traversal exceeded a configured resource limit.
216    #[error("validation traversal limit exceeded: {limit}")]
217    LimitExceeded {
218        /// Limit that was exceeded.
219        limit: &'static str,
220    },
221}
222
223/// Feature policy error.
224#[derive(Debug, Error, Clone, Eq, PartialEq)]
225#[non_exhaustive]
226pub enum PolicyError {
227    /// A policy document field failed validation.
228    #[error("invalid policy field {field}: {reason}")]
229    InvalidField {
230        /// Field that failed validation.
231        field: &'static str,
232        /// Bounded reason string.
233        reason: BoundedText,
234    },
235    /// A policy rule could not be evaluated against the feature report.
236    #[error("policy rule could not be evaluated: {reason}")]
237    Evaluation {
238        /// Bounded reason string.
239        reason: BoundedText,
240    },
241}
242
243/// Metadata repair error.
244#[derive(Debug, Error, Clone, Eq, PartialEq)]
245#[non_exhaustive]
246pub enum RepairError {
247    /// A repair option failed validation.
248    #[error("invalid repair field {field}: {reason}")]
249    InvalidField {
250        /// Field that failed validation.
251        field: &'static str,
252        /// Bounded reason string.
253        reason: BoundedText,
254    },
255    /// A repair operation could not be completed.
256    #[error("metadata repair failed: {reason}")]
257    Failed {
258        /// Bounded reason string.
259        reason: BoundedText,
260    },
261}
262
263/// Reporting-specific error.
264#[derive(Debug, Error)]
265#[non_exhaustive]
266pub enum ReportError {
267    /// JSON serialization failed.
268    #[error("JSON serialization failed")]
269    Json {
270        /// Source JSON error.
271        #[from]
272        source: serde_json::Error,
273    },
274    /// XML serialization failed.
275    #[error("XML serialization failed: {message}")]
276    Xml {
277        /// Bounded diagnostic message.
278        message: BoundedText,
279    },
280    /// Output write failed.
281    #[error("report output write failed")]
282    Write {
283        /// Source I/O error.
284        #[source]
285        source: std::io::Error,
286    },
287}
288
289/// Configuration-specific error.
290#[derive(Debug, Error, Clone, Eq, PartialEq)]
291#[non_exhaustive]
292pub enum ConfigError {
293    /// A configured value was outside the accepted range.
294    #[error("invalid configuration value {field}: {reason}")]
295    InvalidValue {
296        /// Configuration field name.
297        field: &'static str,
298        /// Bounded reason string.
299        reason: BoundedText,
300    },
301}
302
303/// Bounded UTF-8 text for externally supplied strings.
304#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Deserialize, Serialize)]
305#[serde(try_from = "String", into = "String")]
306pub struct BoundedText(String);
307
308impl BoundedText {
309    /// Creates bounded text with a maximum byte length.
310    ///
311    /// # Errors
312    ///
313    /// Returns [`ConfigError`] when `value` is longer than `max_bytes`.
314    pub fn new(
315        value: impl Into<String>,
316        max_bytes: usize,
317    ) -> std::result::Result<Self, ConfigError> {
318        let value = value.into();
319        if value.len() > max_bytes {
320            return Err(ConfigError::InvalidValue {
321                field: "text",
322                reason: Self::unchecked("value exceeds byte limit"),
323            });
324        }
325        Ok(Self(value))
326    }
327
328    /// Returns the text as a string slice.
329    #[must_use]
330    pub fn as_str(&self) -> &str {
331        &self.0
332    }
333
334    pub(crate) fn unchecked(value: impl Into<String>) -> Self {
335        Self(value.into())
336    }
337}
338
339impl fmt::Display for BoundedText {
340    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
341        formatter.write_str(&self.0)
342    }
343}
344
345impl TryFrom<String> for BoundedText {
346    type Error = ConfigError;
347
348    fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
349        Self::new(value, MAX_TEXT_BYTES)
350    }
351}
352
353impl From<BoundedText> for String {
354    fn from(value: BoundedText) -> Self {
355        value.0
356    }
357}
358
359/// Identifier text with a tight byte cap and ASCII policy.
360#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Deserialize, Serialize)]
361#[serde(try_from = "String", into = "String")]
362pub struct Identifier(String);
363
364impl Identifier {
365    /// Creates an identifier from ASCII alphanumeric, dash, underscore, dot, and colon characters.
366    ///
367    /// # Errors
368    ///
369    /// Returns [`ConfigError`] if the identifier is empty, too long, or contains
370    /// characters outside the allowlist.
371    pub fn new(value: impl Into<String>) -> std::result::Result<Self, ConfigError> {
372        let value = value.into();
373        let valid_charset = value
374            .bytes()
375            .all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b'.' | b':'));
376        if value.is_empty() || value.len() > MAX_IDENTIFIER_BYTES || !valid_charset {
377            return Err(ConfigError::InvalidValue {
378                field: "identifier",
379                reason: BoundedText::unchecked("identifier violates byte or charset policy"),
380            });
381        }
382        Ok(Self(value))
383    }
384
385    /// Returns the identifier as a string slice.
386    #[must_use]
387    pub fn as_str(&self) -> &str {
388        &self.0
389    }
390
391    pub(crate) fn unchecked(value: impl Into<String>) -> Self {
392        Self(value.into())
393    }
394}
395
396impl TryFrom<String> for Identifier {
397    type Error = ConfigError;
398
399    fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
400        Self::new(value)
401    }
402}
403
404impl From<Identifier> for String {
405    fn from(value: Identifier) -> Self {
406        value.0
407    }
408}
409
410/// Redacted PDF password secret.
411#[derive(Clone)]
412pub struct PasswordSecret(SecretString);
413
414impl PasswordSecret {
415    /// Creates a password secret using the default password byte cap.
416    ///
417    /// # Errors
418    ///
419    /// Returns [`ConfigError`] when the password exceeds the default cap.
420    pub fn new(value: impl Into<String>) -> std::result::Result<Self, ConfigError> {
421        Self::new_with_limit(value, DEFAULT_MAX_PASSWORD_BYTES)
422    }
423
424    /// Creates a password secret using an explicit byte cap.
425    ///
426    /// # Errors
427    ///
428    /// Returns [`ConfigError`] when the password exceeds the supplied cap or the
429    /// cap is above the hard limit.
430    pub fn new_with_limit(
431        value: impl Into<String>,
432        max_bytes: usize,
433    ) -> std::result::Result<Self, ConfigError> {
434        if max_bytes > HARD_MAX_PASSWORD_BYTES {
435            return Err(ConfigError::InvalidValue {
436                field: "maxPasswordBytes",
437                reason: BoundedText::unchecked("value exceeds hard cap"),
438            });
439        }
440        let value = value.into();
441        if value.len() > max_bytes {
442            return Err(ConfigError::InvalidValue {
443                field: "password",
444                reason: BoundedText::unchecked("password exceeds byte limit"),
445            });
446        }
447        Ok(Self(SecretString::from(value)))
448    }
449
450    pub(crate) fn expose_secret_bytes(&self) -> &[u8] {
451        self.0.expose_secret().as_bytes()
452    }
453}
454
455impl fmt::Debug for PasswordSecret {
456    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
457        formatter.write_str("PasswordSecret([REDACTED])")
458    }
459}
460
461/// PDF validation options shared by parser, engine, and reports.
462#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
463#[non_exhaustive]
464#[serde(rename_all = "camelCase", deny_unknown_fields)]
465pub struct ValidationOptions {
466    /// Flavour/profile selection policy.
467    #[builder(default)]
468    pub flavour: FlavourSelection,
469    /// Parser and validation resource limits.
470    #[builder(default)]
471    pub resource_limits: ResourceLimits,
472    /// Optional redacted password for encrypted PDFs.
473    #[builder(default)]
474    #[serde(skip, default)]
475    pub password: Option<PasswordSecret>,
476    /// Maximum assertion details retained per failed rule.
477    #[builder(default)]
478    pub max_failed_assertions_per_rule: MaxDisplayedFailures,
479    /// Whether passed assertion details are recorded.
480    #[builder(default)]
481    pub record_passed_assertions: bool,
482    /// Whether recoverable parser warnings are included in the report.
483    #[builder(default = true)]
484    pub report_parse_warnings: bool,
485    /// Optional feature families to extract into reports.
486    #[builder(default)]
487    pub feature_selection: FeatureSelection,
488    /// Optional feature-policy rules to evaluate.
489    #[builder(default)]
490    #[serde(skip_serializing_if = "Option::is_none")]
491    pub policy: Option<PolicySet>,
492}
493
494impl Default for ValidationOptions {
495    fn default() -> Self {
496        Self::builder().build()
497    }
498}
499
500/// Flavour/profile selection policy.
501#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
502#[non_exhaustive]
503#[serde(rename_all = "camelCase", deny_unknown_fields)]
504pub enum FlavourSelection {
505    /// Detect flavour from document metadata, optionally falling back to a default.
506    Auto {
507        /// Default flavour used when auto-detection is inconclusive.
508        default: Option<ValidationFlavour>,
509    },
510    /// Validate against an explicit built-in flavour.
511    Explicit {
512        /// Selected validation flavour.
513        flavour: ValidationFlavour,
514    },
515    /// Validate against a custom profile loaded from a path.
516    CustomProfile {
517        /// Custom profile file path.
518        profile_path: PathBuf,
519    },
520}
521
522impl Default for FlavourSelection {
523    fn default() -> Self {
524        Self::Auto {
525            default: Some(ValidationFlavour {
526                family: Identifier::unchecked("pdfa"),
527                part: NonZeroU32::MIN,
528                conformance: Identifier::unchecked("b"),
529            }),
530        }
531    }
532}
533
534/// Validation flavour identifier.
535#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
536#[non_exhaustive]
537#[serde(rename_all = "camelCase", deny_unknown_fields)]
538pub struct ValidationFlavour {
539    /// PDF family, such as `pdfa`.
540    pub family: Identifier,
541    /// Part number, such as `1`.
542    pub part: NonZeroU32,
543    /// Conformance level, such as `b`.
544    pub conformance: Identifier,
545}
546
547impl ValidationFlavour {
548    /// Creates a validation flavour.
549    ///
550    /// # Errors
551    ///
552    /// Returns [`ConfigError`] when identifier fields violate the identifier policy.
553    pub fn new(
554        family: impl Into<String>,
555        part: NonZeroU32,
556        conformance: impl Into<String>,
557    ) -> std::result::Result<Self, ConfigError> {
558        Ok(Self {
559            family: Identifier::new(family)?,
560            part,
561            conformance: Identifier::new(conformance)?,
562        })
563    }
564}
565
566/// Parser and validation resource limits.
567#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
568#[non_exhaustive]
569#[serde(rename_all = "camelCase", deny_unknown_fields)]
570pub struct ResourceLimits {
571    /// Maximum input file bytes.
572    pub max_file_bytes: u64,
573    /// Maximum indirect objects.
574    pub max_objects: u64,
575    /// Maximum nested object depth.
576    pub max_object_depth: u32,
577    /// Maximum array length.
578    pub max_array_len: u64,
579    /// Maximum dictionary entries.
580    pub max_dict_entries: u64,
581    /// Maximum PDF name bytes.
582    pub max_name_bytes: usize,
583    /// Maximum string bytes.
584    pub max_string_bytes: usize,
585    /// Maximum password bytes accepted from public APIs and CLI sources.
586    #[builder(default = DEFAULT_MAX_PASSWORD_BYTES)]
587    #[serde(default = "default_max_password_bytes")]
588    pub max_password_bytes: usize,
589    /// Maximum decrypted string bytes.
590    #[builder(default = DEFAULT_MAX_STRING_BYTES)]
591    #[serde(default = "default_max_decrypted_string_bytes")]
592    pub max_decrypted_string_bytes: usize,
593    /// Maximum declared stream bytes.
594    pub max_stream_declared_bytes: u64,
595    /// Maximum decoded stream bytes.
596    pub max_stream_decode_bytes: u64,
597    /// Maximum decrypted stream bytes before downstream filters.
598    #[builder(default = DEFAULT_MAX_STREAM_DECODE_BYTES)]
599    #[serde(default = "default_max_decrypted_stream_bytes")]
600    pub max_decrypted_stream_bytes: u64,
601    /// Maximum encryption dictionary entries.
602    #[builder(default = DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES)]
603    #[serde(default = "default_max_encryption_dict_entries")]
604    pub max_encryption_dict_entries: u64,
605    /// Maximum source bytes kept in memory before spilling to a temporary file.
606    #[builder(default = DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES)]
607    #[serde(default = "default_memory_source_threshold_bytes")]
608    pub memory_source_threshold_bytes: u64,
609    /// Maximum retained parse facts.
610    pub max_parse_facts: usize,
611    /// Maximum catalog XMP metadata stream bytes.
612    #[builder(default = DEFAULT_MAX_XMP_BYTES)]
613    #[serde(default = "default_max_xmp_bytes")]
614    pub max_xmp_bytes: u64,
615    /// Maximum XML elements parsed from one XMP packet.
616    #[builder(default = DEFAULT_MAX_XMP_ELEMENTS)]
617    #[serde(default = "default_max_xmp_elements")]
618    pub max_xmp_elements: u64,
619    /// Maximum XML element nesting depth in one XMP packet.
620    #[builder(default = DEFAULT_MAX_XMP_DEPTH)]
621    #[serde(default = "default_max_xmp_depth")]
622    pub max_xmp_depth: u32,
623    /// Maximum XML attributes accepted on one XMP element.
624    #[builder(default = DEFAULT_MAX_XMP_ATTRIBUTES)]
625    #[serde(default = "default_max_xmp_attributes")]
626    pub max_xmp_attributes: usize,
627    /// Maximum namespace declarations retained from one XMP packet.
628    #[builder(default = DEFAULT_MAX_XMP_NAMESPACES)]
629    #[serde(default = "default_max_xmp_namespaces")]
630    pub max_xmp_namespaces: usize,
631    /// Maximum text bytes retained from one XMP metadata property.
632    #[builder(default = DEFAULT_MAX_XMP_TEXT_BYTES)]
633    #[serde(default = "default_max_xmp_text_bytes")]
634    pub max_xmp_text_bytes: usize,
635}
636
637impl Default for ResourceLimits {
638    fn default() -> Self {
639        Self {
640            max_file_bytes: 256 * 1024 * 1024,
641            max_objects: 1_000_000,
642            max_object_depth: 128,
643            max_array_len: 65_536,
644            max_dict_entries: 16_384,
645            max_name_bytes: 127,
646            max_string_bytes: DEFAULT_MAX_STRING_BYTES,
647            max_password_bytes: DEFAULT_MAX_PASSWORD_BYTES,
648            max_decrypted_string_bytes: DEFAULT_MAX_STRING_BYTES,
649            max_stream_declared_bytes: 128 * 1024 * 1024,
650            max_stream_decode_bytes: DEFAULT_MAX_STREAM_DECODE_BYTES,
651            max_decrypted_stream_bytes: DEFAULT_MAX_STREAM_DECODE_BYTES,
652            max_encryption_dict_entries: DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES,
653            memory_source_threshold_bytes: DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES,
654            max_parse_facts: 100_000,
655            max_xmp_bytes: DEFAULT_MAX_XMP_BYTES,
656            max_xmp_elements: DEFAULT_MAX_XMP_ELEMENTS,
657            max_xmp_depth: DEFAULT_MAX_XMP_DEPTH,
658            max_xmp_attributes: DEFAULT_MAX_XMP_ATTRIBUTES,
659            max_xmp_namespaces: DEFAULT_MAX_XMP_NAMESPACES,
660            max_xmp_text_bytes: DEFAULT_MAX_XMP_TEXT_BYTES,
661        }
662    }
663}
664
665fn default_max_password_bytes() -> usize {
666    DEFAULT_MAX_PASSWORD_BYTES
667}
668
669fn default_max_decrypted_string_bytes() -> usize {
670    DEFAULT_MAX_STRING_BYTES
671}
672
673fn default_max_decrypted_stream_bytes() -> u64 {
674    DEFAULT_MAX_STREAM_DECODE_BYTES
675}
676
677fn default_max_encryption_dict_entries() -> u64 {
678    DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES
679}
680
681fn default_memory_source_threshold_bytes() -> u64 {
682    DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES
683}
684
685fn default_max_xmp_bytes() -> u64 {
686    DEFAULT_MAX_XMP_BYTES
687}
688
689fn default_max_xmp_elements() -> u64 {
690    DEFAULT_MAX_XMP_ELEMENTS
691}
692
693fn default_max_xmp_depth() -> u32 {
694    DEFAULT_MAX_XMP_DEPTH
695}
696
697fn default_max_xmp_attributes() -> usize {
698    DEFAULT_MAX_XMP_ATTRIBUTES
699}
700
701fn default_max_xmp_namespaces() -> usize {
702    DEFAULT_MAX_XMP_NAMESPACES
703}
704
705fn default_max_xmp_text_bytes() -> usize {
706    DEFAULT_MAX_XMP_TEXT_BYTES
707}
708
709/// Maximum displayed assertion failures per rule.
710#[derive(Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize)]
711#[serde(try_from = "u32", into = "u32")]
712pub struct MaxDisplayedFailures(NonZeroU32);
713
714impl MaxDisplayedFailures {
715    /// Creates a failure display cap.
716    #[must_use]
717    pub fn new(value: NonZeroU32) -> Self {
718        Self(value)
719    }
720
721    /// Returns the cap as `u32`.
722    #[must_use]
723    pub fn get(self) -> u32 {
724        self.0.get()
725    }
726}
727
728impl Default for MaxDisplayedFailures {
729    fn default() -> Self {
730        Self(NonZeroU32::MIN)
731    }
732}
733
734impl TryFrom<u32> for MaxDisplayedFailures {
735    type Error = ConfigError;
736
737    fn try_from(value: u32) -> std::result::Result<Self, Self::Error> {
738        let Some(value) = NonZeroU32::new(value) else {
739            return Err(ConfigError::InvalidValue {
740                field: "maxFailedAssertionsPerRule",
741                reason: BoundedText::unchecked("value must be greater than zero"),
742            });
743        };
744        Ok(Self(value))
745    }
746}
747
748impl From<MaxDisplayedFailures> for u32 {
749    fn from(value: MaxDisplayedFailures) -> Self {
750        value.get()
751    }
752}
753
754/// Complete validation report for one input.
755#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
756#[non_exhaustive]
757#[serde(rename_all = "camelCase", deny_unknown_fields)]
758pub struct ValidationReport {
759    /// Engine version that produced the report.
760    pub engine_version: String,
761    /// Input summary.
762    pub source: InputSummary,
763    /// Overall validation status.
764    pub status: ValidationStatus,
765    /// Detected or selected flavours.
766    pub flavours: Vec<ValidationFlavour>,
767    /// Per-profile validation results.
768    pub profile_reports: Vec<ProfileReport>,
769    /// Parser facts retained for validation and diagnostics.
770    pub parse_facts: Vec<ParseFact>,
771    /// User-visible warnings.
772    pub warnings: Vec<ValidationWarning>,
773    /// Optional read-only feature extraction report.
774    #[builder(default)]
775    #[serde(skip_serializing_if = "Option::is_none")]
776    pub feature_report: Option<FeatureReport>,
777    /// Optional policy evaluation report.
778    #[builder(default)]
779    #[serde(skip_serializing_if = "Option::is_none")]
780    pub policy_report: Option<PolicyReport>,
781    /// Task duration measurements.
782    pub task_durations: Vec<TaskDuration>,
783}
784
785/// Machine-readable read-only feature extraction report.
786#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
787#[non_exhaustive]
788#[serde(rename_all = "camelCase", deny_unknown_fields)]
789pub struct FeatureReport {
790    /// Extracted feature objects in deterministic traversal order.
791    pub objects: Vec<FeatureObject>,
792    /// Total model objects visited while extracting features.
793    pub visited_objects: u64,
794    /// Feature families requested by the caller.
795    pub selected_families: Vec<ObjectTypeName>,
796    /// Whether extraction stopped because a resource limit was reached.
797    pub truncated: bool,
798}
799
800/// One extracted validation-model object.
801#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
802#[non_exhaustive]
803#[serde(rename_all = "camelCase", deny_unknown_fields)]
804pub struct FeatureObject {
805    /// Validation model family name.
806    pub family: ObjectTypeName,
807    /// Stable object location.
808    pub location: ObjectLocation,
809    /// Bounded diagnostic context path.
810    pub context: BoundedText,
811    /// Extracted scalar properties keyed by validation-model property name.
812    pub properties: BTreeMap<PropertyName, FeatureValue>,
813}
814
815/// Feature property value.
816#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
817#[non_exhaustive]
818#[serde(rename_all = "camelCase", tag = "type", content = "value")]
819pub enum FeatureValue {
820    /// Null feature value.
821    Null,
822    /// Boolean feature value.
823    Bool(bool),
824    /// Numeric feature value.
825    Number(f64),
826    /// Bounded string feature value.
827    String(BoundedText),
828    /// Content-bearing string value redacted from reports.
829    RedactedString {
830        /// Original string byte length.
831        bytes: u64,
832    },
833    /// Object key feature value.
834    ObjectKey(ObjectKey),
835    /// Bounded list feature value.
836    List(Vec<FeatureValue>),
837}
838
839/// Bounded policy rules evaluated over a [`FeatureReport`].
840#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
841#[non_exhaustive]
842#[serde(rename_all = "camelCase", deny_unknown_fields)]
843pub struct PolicySet {
844    /// Optional policy document name.
845    #[serde(default, skip_serializing_if = "Option::is_none")]
846    pub name: Option<BoundedText>,
847    /// Policy rules.
848    pub rules: Vec<PolicyRule>,
849}
850
851impl PolicySet {
852    /// Validates collection-level policy limits.
853    ///
854    /// # Errors
855    ///
856    /// Returns [`PolicyError`] when the policy exceeds compiled limits.
857    pub fn validate(&self) -> std::result::Result<(), PolicyError> {
858        const MAX_POLICY_RULES: usize = 1024;
859        if self.rules.is_empty() {
860            return Err(PolicyError::InvalidField {
861                field: "rules",
862                reason: BoundedText::unchecked("policy must contain at least one rule"),
863            });
864        }
865        if self.rules.len() > MAX_POLICY_RULES {
866            return Err(PolicyError::InvalidField {
867                field: "rules",
868                reason: BoundedText::unchecked("policy rule count exceeds limit"),
869            });
870        }
871        Ok(())
872    }
873}
874
875/// One bounded policy rule.
876#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
877#[non_exhaustive]
878#[serde(rename_all = "camelCase", deny_unknown_fields)]
879pub struct PolicyRule {
880    /// Rule identifier.
881    pub id: Identifier,
882    /// Human-readable rule description.
883    pub description: BoundedText,
884    /// Feature family to inspect.
885    pub family: ObjectTypeName,
886    /// Feature property to inspect.
887    pub field: PropertyName,
888    /// Comparison operator.
889    pub operator: PolicyOperator,
890    /// Optional comparison value.
891    #[serde(default, skip_serializing_if = "Option::is_none")]
892    pub value: Option<PolicyValue>,
893}
894
895/// Bounded policy comparison operator.
896#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
897#[non_exhaustive]
898#[serde(rename_all = "camelCase")]
899pub enum PolicyOperator {
900    /// At least one matching feature object contains the field.
901    Exists,
902    /// No matching feature object contains the field.
903    Absent,
904    /// At least one matching value equals the rule value.
905    Equals,
906    /// No matching value equals the rule value.
907    NotEquals,
908    /// At least one matching numeric value is greater than or equal to the rule value.
909    Min,
910    /// At least one matching numeric value is less than or equal to the rule value.
911    Max,
912}
913
914/// Policy comparison value.
915#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
916#[non_exhaustive]
917#[serde(rename_all = "camelCase", tag = "type", content = "value")]
918pub enum PolicyValue {
919    /// Boolean comparison value.
920    Bool(bool),
921    /// Integer comparison value.
922    Number(i32),
923    /// Bounded string comparison value.
924    String(BoundedText),
925}
926
927/// Policy evaluation report.
928#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
929#[non_exhaustive]
930#[serde(rename_all = "camelCase", deny_unknown_fields)]
931pub struct PolicyReport {
932    /// Optional policy document name.
933    #[builder(default)]
934    #[serde(skip_serializing_if = "Option::is_none")]
935    pub name: Option<BoundedText>,
936    /// Whether all policy rules passed.
937    pub is_compliant: bool,
938    /// Rule results.
939    pub results: Vec<PolicyRuleResult>,
940}
941
942/// One policy rule result.
943#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
944#[non_exhaustive]
945#[serde(rename_all = "camelCase", deny_unknown_fields)]
946pub struct PolicyRuleResult {
947    /// Rule identifier.
948    pub id: Identifier,
949    /// Human-readable rule description.
950    pub description: BoundedText,
951    /// Rule pass/fail status.
952    pub passed: bool,
953    /// Number of matching feature objects considered.
954    pub matches: u64,
955    /// Bounded diagnostic message.
956    pub message: BoundedText,
957}
958
959/// Metadata repair report for one input.
960#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
961#[non_exhaustive]
962#[serde(rename_all = "camelCase", deny_unknown_fields)]
963pub struct RepairReport {
964    /// Engine version that produced the report.
965    pub engine_version: String,
966    /// Input summary.
967    pub source: InputSummary,
968    /// Optional output path when a repaired or unchanged file was written.
969    #[serde(skip_serializing_if = "Option::is_none")]
970    pub output_path: Option<PathBuf>,
971    /// Overall repair status.
972    pub status: RepairStatus,
973    /// Actions completed for this input.
974    pub actions: Vec<RepairAction>,
975    /// Refusal reason when no output was produced.
976    #[serde(skip_serializing_if = "Option::is_none")]
977    pub refusal: Option<RepairRefusal>,
978    /// User-visible repair warnings.
979    pub warnings: Vec<ValidationWarning>,
980    /// Task duration measurements.
981    pub task_durations: Vec<TaskDuration>,
982}
983
984impl RepairReport {
985    /// Returns true when the report describes a written output file.
986    #[must_use]
987    pub fn wrote_output(&self) -> bool {
988        matches!(
989            self.status,
990            RepairStatus::Succeeded | RepairStatus::NoAction
991        ) && self.output_path.is_some()
992    }
993}
994
995/// Options for safe metadata repair.
996#[derive(Clone, Debug)]
997pub struct MetadataRepairOptions {
998    /// Validation options used to parse and classify repair inputs.
999    pub validation_options: ValidationOptions,
1000    /// Canonical output directory where repaired files are written.
1001    pub output_dir: PathBuf,
1002    /// Prefix added to each output filename.
1003    pub prefix: String,
1004}
1005
1006impl MetadataRepairOptions {
1007    /// Creates repair options after validating output directory and prefix.
1008    ///
1009    /// # Errors
1010    ///
1011    /// Returns [`PdfvError`] when the output directory or prefix violates the
1012    /// repair safety policy.
1013    pub fn new(
1014        validation_options: ValidationOptions,
1015        output_dir: impl AsRef<Path>,
1016        prefix: impl Into<String>,
1017    ) -> Result<Self> {
1018        Ok(Self {
1019            validation_options,
1020            output_dir: validate_repair_output_dir(output_dir.as_ref())?,
1021            prefix: validate_repair_prefix(&prefix.into())?,
1022        })
1023    }
1024}
1025
1026/// Safe metadata repair facade.
1027#[derive(Debug)]
1028pub struct MetadataRepairer {
1029    validator: Validator,
1030    output_dir: PathBuf,
1031    prefix: String,
1032}
1033
1034impl MetadataRepairer {
1035    /// Creates a metadata repair facade.
1036    ///
1037    /// # Errors
1038    ///
1039    /// Returns [`PdfvError`] if validation setup or repair options are invalid.
1040    pub fn new(options: MetadataRepairOptions) -> Result<Self> {
1041        Ok(Self {
1042            validator: Validator::new(options.validation_options)?,
1043            output_dir: options.output_dir,
1044            prefix: options.prefix,
1045        })
1046    }
1047
1048    /// Repairs one PDF file by writing a non-in-place output or a refusal report.
1049    ///
1050    /// # Errors
1051    ///
1052    /// Returns [`PdfvError`] for I/O failures before a report can be produced.
1053    pub fn repair_path(&self, path: impl AsRef<Path>) -> Result<RepairReport> {
1054        repair_metadata_path(
1055            &self.validator,
1056            path.as_ref(),
1057            &self.output_dir,
1058            &self.prefix,
1059        )
1060    }
1061}
1062
1063/// Batch metadata repair report.
1064#[derive(Clone, Debug, Deserialize, Serialize)]
1065#[non_exhaustive]
1066#[serde(rename_all = "camelCase", deny_unknown_fields)]
1067pub struct RepairBatchReport {
1068    /// Item reports.
1069    pub items: Vec<RepairReport>,
1070    /// Batch summary.
1071    pub summary: RepairBatchSummary,
1072    /// Batch-level warnings.
1073    pub warnings: Vec<ValidationWarning>,
1074}
1075
1076impl RepairBatchReport {
1077    /// Builds a repair batch report and computes summary counters.
1078    #[must_use]
1079    pub fn from_items(
1080        items: Vec<RepairReport>,
1081        warnings: Vec<ValidationWarning>,
1082        elapsed: Duration,
1083    ) -> Self {
1084        let summary = RepairBatchSummary::from_items(&items, elapsed);
1085        Self {
1086            items,
1087            summary,
1088            warnings,
1089        }
1090    }
1091}
1092
1093/// Batch metadata repair summary counters.
1094#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
1095#[non_exhaustive]
1096#[serde(rename_all = "camelCase", deny_unknown_fields)]
1097pub struct RepairBatchSummary {
1098    /// Total input count.
1099    pub total_files: u64,
1100    /// Inputs that produced a modified repair output.
1101    pub succeeded: u64,
1102    /// Inputs that needed no metadata change and were copied unchanged.
1103    pub no_action: u64,
1104    /// Inputs refused by the repair safety model.
1105    pub refused: u64,
1106    /// Inputs that failed while attempting an output write.
1107    pub failed: u64,
1108    /// Elapsed milliseconds.
1109    pub elapsed_millis: u64,
1110    /// Worst exit category.
1111    pub worst_exit_category: ExitCategory,
1112}
1113
1114impl RepairBatchSummary {
1115    /// Computes summary counters from item reports.
1116    #[must_use]
1117    pub fn from_items(items: &[RepairReport], elapsed: Duration) -> Self {
1118        let mut summary = Self {
1119            total_files: u64::try_from(items.len()).unwrap_or(u64::MAX),
1120            elapsed_millis: duration_millis(elapsed),
1121            ..Self::default()
1122        };
1123        for item in items {
1124            match item.status {
1125                RepairStatus::Succeeded => summary.succeeded = summary.succeeded.saturating_add(1),
1126                RepairStatus::NoAction => summary.no_action = summary.no_action.saturating_add(1),
1127                RepairStatus::Refused => summary.refused = summary.refused.saturating_add(1),
1128                RepairStatus::Failed => summary.failed = summary.failed.saturating_add(1),
1129            }
1130        }
1131        summary.worst_exit_category = if summary.failed > 0 {
1132            ExitCategory::InternalError
1133        } else if summary.refused > 0 {
1134            ExitCategory::ProcessingFailed
1135        } else {
1136            ExitCategory::Success
1137        };
1138        summary
1139    }
1140}
1141
1142/// Metadata repair status.
1143#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1144#[non_exhaustive]
1145#[serde(rename_all = "camelCase")]
1146pub enum RepairStatus {
1147    /// Repair modified metadata and wrote an output file.
1148    Succeeded,
1149    /// No metadata change was needed; an unchanged output file was written.
1150    NoAction,
1151    /// Repair was explicitly refused before writing output.
1152    Refused,
1153    /// Repair failed while writing or finalizing output.
1154    Failed,
1155}
1156
1157/// Metadata repair action.
1158#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1159#[non_exhaustive]
1160#[serde(rename_all = "camelCase", tag = "kind")]
1161pub enum RepairAction {
1162    /// The input was copied unchanged to the output path.
1163    CopiedUnchanged,
1164    /// XMP metadata was repaired.
1165    MetadataRewritten {
1166        /// Bounded action description.
1167        description: BoundedText,
1168    },
1169}
1170
1171/// Explicit reason metadata repair was refused.
1172#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1173#[non_exhaustive]
1174#[serde(rename_all = "camelCase", tag = "kind")]
1175pub enum RepairRefusal {
1176    /// Input could not be parsed as PDF.
1177    ParseFailed {
1178        /// Bounded reason.
1179        reason: BoundedText,
1180    },
1181    /// Encrypted inputs are not repaired by this phase.
1182    Encrypted,
1183    /// Repair requires exactly one selected validation flavour.
1184    AmbiguousFlavour {
1185        /// Number of selected flavours.
1186        selected: u64,
1187    },
1188    /// Validation failed and safe metadata rewrite support is unavailable.
1189    UnsupportedValidationStatus {
1190        /// Validation status that blocked repair.
1191        status: ValidationStatus,
1192    },
1193    /// Output path would overwrite the input.
1194    OutputWouldModifyInput,
1195    /// Output path failed validation.
1196    InvalidOutputPath {
1197        /// Bounded reason.
1198        reason: BoundedText,
1199    },
1200}
1201
1202#[allow(
1203    clippy::disallowed_methods,
1204    reason = "metadata repair is an explicit synchronous file rewrite API, not an async service \
1205              path"
1206)]
1207fn repair_metadata_path(
1208    validator: &Validator,
1209    path: &Path,
1210    output_dir: &Path,
1211    prefix: &str,
1212) -> Result<RepairReport> {
1213    let source = input_summary_for_path(path)?;
1214    let output_path = repair_output_path(path, output_dir, prefix)?;
1215    let input_canonical = std::fs::canonicalize(path).map_err(|source| PdfvError::Io {
1216        path: Some(path.to_path_buf()),
1217        source,
1218    })?;
1219    if input_canonical == output_path {
1220        return Ok(refused_repair_report(
1221            source,
1222            RepairRefusal::OutputWouldModifyInput,
1223        ));
1224    }
1225    if output_path.exists() {
1226        return Ok(refused_repair_report(
1227            source,
1228            RepairRefusal::InvalidOutputPath {
1229                reason: BoundedText::unchecked("output path already exists"),
1230            },
1231        ));
1232    }
1233
1234    let started = std::time::Instant::now();
1235    let validation = validator.validate_path(path)?;
1236    if matches!(validation.status, ValidationStatus::ParseFailed) {
1237        return Ok(refused_repair_report(
1238            source,
1239            RepairRefusal::ParseFailed {
1240                reason: validation
1241                    .warnings
1242                    .first()
1243                    .map_or_else(default_parse_failed_text, ValidationWarning::message_text),
1244            },
1245        ));
1246    }
1247    if matches!(validation.status, ValidationStatus::Encrypted) {
1248        return Ok(refused_repair_report(source, RepairRefusal::Encrypted));
1249    }
1250    let selected_profiles = if validation.flavours.is_empty() {
1251        validation.profile_reports.len()
1252    } else {
1253        validation.flavours.len()
1254    };
1255    if selected_profiles != 1 {
1256        return Ok(refused_repair_report(
1257            source,
1258            RepairRefusal::AmbiguousFlavour {
1259                selected: u64::try_from(selected_profiles).unwrap_or(u64::MAX),
1260            },
1261        ));
1262    }
1263    if !matches!(validation.status, ValidationStatus::Valid) {
1264        return Ok(refused_repair_report(
1265            source,
1266            RepairRefusal::UnsupportedValidationStatus {
1267                status: validation.status,
1268            },
1269        ));
1270    }
1271
1272    match atomic_copy(path, &output_path) {
1273        Ok(()) => Ok(RepairReport::builder()
1274            .engine_version(ENGINE_VERSION.to_owned())
1275            .source(source)
1276            .output_path(Some(output_path))
1277            .status(RepairStatus::NoAction)
1278            .actions(vec![RepairAction::CopiedUnchanged])
1279            .refusal(None)
1280            .warnings(Vec::new())
1281            .task_durations(vec![TaskDuration::from_duration(
1282                Identifier::new("repairMetadata")?,
1283                started.elapsed(),
1284            )])
1285            .build()),
1286        Err(error) => {
1287            remove_failed_output(&output_path)?;
1288            Ok(failed_repair_report(
1289                source,
1290                Some(output_path),
1291                &error.to_string(),
1292            ))
1293        }
1294    }
1295}
1296
1297#[allow(
1298    clippy::disallowed_methods,
1299    reason = "metadata repair reports filesystem input size synchronously"
1300)]
1301fn input_summary_for_path(path: &Path) -> Result<InputSummary> {
1302    let metadata = std::fs::metadata(path).map_err(|source| PdfvError::Io {
1303        path: Some(path.to_path_buf()),
1304        source,
1305    })?;
1306    Ok(InputSummary::new(
1307        InputKind::File,
1308        Some(path.to_path_buf()),
1309        Some(metadata.len()),
1310    ))
1311}
1312
1313#[allow(
1314    clippy::disallowed_methods,
1315    reason = "metadata repair validates a caller-selected filesystem output directory"
1316)]
1317fn validate_repair_output_dir(path: &Path) -> Result<PathBuf> {
1318    let metadata = std::fs::metadata(path).map_err(|source| PdfvError::Io {
1319        path: Some(path.to_path_buf()),
1320        source,
1321    })?;
1322    if !metadata.is_dir() {
1323        return Err(RepairError::InvalidField {
1324            field: "outputDir",
1325            reason: BoundedText::unchecked("output directory is not a directory"),
1326        }
1327        .into());
1328    }
1329    std::fs::canonicalize(path).map_err(|source| PdfvError::Io {
1330        path: Some(path.to_path_buf()),
1331        source,
1332    })
1333}
1334
1335fn validate_repair_prefix(prefix: &str) -> Result<String> {
1336    const MAX_REPAIR_PREFIX_BYTES: usize = 64;
1337    let valid = prefix.len() <= MAX_REPAIR_PREFIX_BYTES
1338        && prefix
1339            .bytes()
1340            .all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b'.'));
1341    if valid {
1342        Ok(prefix.to_owned())
1343    } else {
1344        Err(RepairError::InvalidField {
1345            field: "prefix",
1346            reason: BoundedText::unchecked(
1347                "prefix must be ASCII letters, digits, dot, dash, or underscore and at most 64 \
1348                 bytes",
1349            ),
1350        }
1351        .into())
1352    }
1353}
1354
1355fn repair_output_path(path: &Path, output_dir: &Path, prefix: &str) -> Result<PathBuf> {
1356    let file_name = path
1357        .file_name()
1358        .and_then(|name| name.to_str())
1359        .ok_or_else(|| RepairError::InvalidField {
1360            field: "paths",
1361            reason: BoundedText::unchecked("input path must have a UTF-8 file name"),
1362        })?;
1363    validate_output_filename(file_name)?;
1364    let output_name = format!("{prefix}{file_name}");
1365    validate_output_filename(&output_name)?;
1366    Ok(output_dir.join(output_name))
1367}
1368
1369fn validate_output_filename(name: &str) -> Result<()> {
1370    const MAX_OUTPUT_FILENAME_BYTES: usize = 255;
1371    let valid = !name.is_empty()
1372        && name.len() <= MAX_OUTPUT_FILENAME_BYTES
1373        && !name.contains("..")
1374        && name
1375            .bytes()
1376            .all(|byte| byte != b'\0' && byte != b'/' && byte != b'\\');
1377    if valid {
1378        Ok(())
1379    } else {
1380        Err(RepairError::InvalidField {
1381            field: "output",
1382            reason: BoundedText::unchecked("output filename is invalid"),
1383        }
1384        .into())
1385    }
1386}
1387
1388#[allow(
1389    clippy::disallowed_methods,
1390    clippy::disallowed_types,
1391    reason = "metadata repair performs synchronous atomic file output by design"
1392)]
1393fn atomic_copy(input: &Path, output_path: &Path) -> Result<()> {
1394    let Some(parent) = output_path.parent() else {
1395        return Err(RepairError::InvalidField {
1396            field: "outputDir",
1397            reason: BoundedText::unchecked("output path has no parent"),
1398        }
1399        .into());
1400    };
1401    let mut source = std::fs::File::open(input).map_err(|source| PdfvError::Io {
1402        path: Some(input.to_path_buf()),
1403        source,
1404    })?;
1405    let mut temp = tempfile::NamedTempFile::new_in(parent).map_err(|source| PdfvError::Io {
1406        path: Some(parent.to_path_buf()),
1407        source,
1408    })?;
1409    io::copy(&mut source, &mut temp).map_err(|source| PdfvError::Io {
1410        path: Some(input.to_path_buf()),
1411        source,
1412    })?;
1413    temp.flush().map_err(|source| PdfvError::Io {
1414        path: Some(output_path.to_path_buf()),
1415        source,
1416    })?;
1417    temp.persist(output_path).map_err(|error| PdfvError::Io {
1418        path: Some(output_path.to_path_buf()),
1419        source: error.error,
1420    })?;
1421    Ok(())
1422}
1423
1424#[allow(
1425    clippy::disallowed_methods,
1426    reason = "metadata repair removes failed synchronous output artifacts"
1427)]
1428fn remove_failed_output(output_path: &Path) -> Result<()> {
1429    match std::fs::remove_file(output_path) {
1430        Ok(()) => Ok(()),
1431        Err(error) if error.kind() == std::io::ErrorKind::NotFound => Ok(()),
1432        Err(source) => Err(PdfvError::Io {
1433            path: Some(output_path.to_path_buf()),
1434            source,
1435        }),
1436    }
1437}
1438
1439fn refused_repair_report(source: InputSummary, refusal: RepairRefusal) -> RepairReport {
1440    RepairReport::builder()
1441        .engine_version(ENGINE_VERSION.to_owned())
1442        .source(source)
1443        .output_path(None)
1444        .status(RepairStatus::Refused)
1445        .actions(Vec::new())
1446        .refusal(Some(refusal))
1447        .warnings(Vec::new())
1448        .task_durations(Vec::new())
1449        .build()
1450}
1451
1452fn failed_repair_report(
1453    source: InputSummary,
1454    output_path: Option<PathBuf>,
1455    reason: &str,
1456) -> RepairReport {
1457    RepairReport::builder()
1458        .engine_version(ENGINE_VERSION.to_owned())
1459        .source(source)
1460        .output_path(output_path)
1461        .status(RepairStatus::Failed)
1462        .actions(Vec::new())
1463        .refusal(None)
1464        .warnings(vec![ValidationWarning::General {
1465            message: BoundedText::new(reason, 512)
1466                .unwrap_or_else(|_| BoundedText::unchecked("metadata repair failed")),
1467        }])
1468        .task_durations(Vec::new())
1469        .build()
1470}
1471
1472fn default_parse_failed_text() -> BoundedText {
1473    BoundedText::unchecked("parse failed")
1474}
1475
1476/// Input summary included in reports.
1477#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1478#[non_exhaustive]
1479#[serde(rename_all = "camelCase", deny_unknown_fields)]
1480pub struct InputSummary {
1481    /// Input kind.
1482    pub kind: InputKind,
1483    /// Path when the input came from the filesystem.
1484    #[serde(skip_serializing_if = "Option::is_none")]
1485    pub path: Option<PathBuf>,
1486    /// Input byte length when known.
1487    #[serde(skip_serializing_if = "Option::is_none")]
1488    pub bytes: Option<u64>,
1489}
1490
1491impl InputSummary {
1492    /// Creates an input summary.
1493    #[must_use]
1494    pub fn new(kind: InputKind, path: Option<PathBuf>, bytes: Option<u64>) -> Self {
1495        Self { kind, path, bytes }
1496    }
1497}
1498
1499/// Input kind.
1500#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1501#[non_exhaustive]
1502#[serde(rename_all = "camelCase")]
1503pub enum InputKind {
1504    /// Filesystem input.
1505    File,
1506    /// In-memory or reader input.
1507    Memory,
1508}
1509
1510/// Overall validation status.
1511#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1512#[non_exhaustive]
1513#[serde(rename_all = "camelCase")]
1514pub enum ValidationStatus {
1515    /// All required checks passed.
1516    Valid,
1517    /// One or more required checks failed.
1518    Invalid,
1519    /// Input is encrypted and cannot be validated in the current phase.
1520    Encrypted,
1521    /// Validation could not complete.
1522    Incomplete,
1523    /// Input could not be parsed.
1524    ParseFailed,
1525}
1526
1527/// Per-profile report.
1528#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
1529#[non_exhaustive]
1530#[serde(rename_all = "camelCase", deny_unknown_fields)]
1531pub struct ProfileReport {
1532    /// Profile identity.
1533    pub profile: ProfileIdentity,
1534    /// Whether this profile is compliant.
1535    pub is_compliant: bool,
1536    /// Number of checks executed.
1537    pub checks_executed: u64,
1538    /// Number of rules executed.
1539    pub rules_executed: u64,
1540    /// Number of failed rules.
1541    pub failed_rules: u64,
1542    /// Bounded failed assertion details.
1543    pub failed_assertions: Vec<Assertion>,
1544    /// Bounded passed assertion details.
1545    pub passed_assertions: Vec<Assertion>,
1546    /// Unsupported required rules.
1547    pub unsupported_rules: Vec<UnsupportedRule>,
1548}
1549
1550/// Profile identity.
1551#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1552#[non_exhaustive]
1553#[serde(rename_all = "camelCase", deny_unknown_fields)]
1554pub struct ProfileIdentity {
1555    /// Profile id.
1556    pub id: Identifier,
1557    /// Human-readable profile name.
1558    pub name: BoundedText,
1559    /// Profile version string.
1560    #[serde(skip_serializing_if = "Option::is_none")]
1561    pub version: Option<Identifier>,
1562}
1563
1564/// Rule assertion detail.
1565#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1566#[non_exhaustive]
1567#[serde(rename_all = "camelCase", deny_unknown_fields)]
1568pub struct Assertion {
1569    /// Report-stable assertion ordinal.
1570    pub ordinal: NonZeroU64,
1571    /// Rule id.
1572    pub rule_id: RuleId,
1573    /// Assertion status.
1574    pub status: AssertionStatus,
1575    /// Assertion description.
1576    pub description: BoundedText,
1577    /// Object location.
1578    pub location: ObjectLocation,
1579    /// Optional object context path.
1580    #[serde(skip_serializing_if = "Option::is_none")]
1581    pub object_context: Option<BoundedText>,
1582    /// Optional assertion message.
1583    #[serde(skip_serializing_if = "Option::is_none")]
1584    pub message: Option<BoundedText>,
1585    /// Error template arguments.
1586    pub error_arguments: Vec<ErrorArgument>,
1587}
1588
1589/// Assertion status.
1590#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1591#[non_exhaustive]
1592#[serde(rename_all = "camelCase")]
1593pub enum AssertionStatus {
1594    /// Assertion passed.
1595    Passed,
1596    /// Assertion failed.
1597    Failed,
1598}
1599
1600/// Rule id.
1601#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
1602#[serde(transparent)]
1603pub struct RuleId(pub Identifier);
1604
1605/// Object location for diagnostics.
1606#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1607#[non_exhaustive]
1608#[serde(rename_all = "camelCase", deny_unknown_fields)]
1609pub struct ObjectLocation {
1610    /// Indirect object key when available.
1611    #[serde(skip_serializing_if = "Option::is_none")]
1612    pub object: Option<ObjectKey>,
1613    /// Byte offset when available.
1614    #[serde(skip_serializing_if = "Option::is_none")]
1615    pub offset: Option<u64>,
1616    /// Human-readable path in the validation model.
1617    #[serde(skip_serializing_if = "Option::is_none")]
1618    pub path: Option<BoundedText>,
1619}
1620
1621/// Indirect PDF object key.
1622#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
1623#[non_exhaustive]
1624#[serde(rename_all = "camelCase", deny_unknown_fields)]
1625pub struct ObjectKey {
1626    /// Object number.
1627    pub number: NonZeroU32,
1628    /// Generation number.
1629    pub generation: u16,
1630}
1631
1632impl ObjectKey {
1633    /// Creates an indirect object key.
1634    #[must_use]
1635    pub fn new(number: NonZeroU32, generation: u16) -> Self {
1636        Self { number, generation }
1637    }
1638}
1639
1640/// Error template argument.
1641#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1642#[non_exhaustive]
1643#[serde(rename_all = "camelCase", deny_unknown_fields)]
1644pub struct ErrorArgument {
1645    /// Argument name.
1646    pub name: Identifier,
1647    /// Argument value.
1648    pub value: BoundedText,
1649}
1650
1651/// Unsupported rule detail.
1652#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1653#[non_exhaustive]
1654#[serde(rename_all = "camelCase", deny_unknown_fields)]
1655pub struct UnsupportedRule {
1656    /// Profile id that owns the rule.
1657    pub profile_id: Identifier,
1658    /// Unsupported rule id.
1659    pub rule_id: RuleId,
1660    /// Expression fragment when available.
1661    #[serde(skip_serializing_if = "Option::is_none")]
1662    pub expression_fragment: Option<BoundedText>,
1663    /// Unsupported reason.
1664    pub reason: BoundedText,
1665    /// Specification citations associated with this rule.
1666    pub references: Vec<SpecReference>,
1667}
1668
1669/// Specification citation associated with a validation rule.
1670#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1671#[non_exhaustive]
1672#[serde(rename_all = "camelCase", deny_unknown_fields)]
1673pub struct SpecReference {
1674    /// Specification name.
1675    pub specification: BoundedText,
1676    /// Clause or section identifier.
1677    pub clause: BoundedText,
1678}
1679
1680/// Parser fact emitted by tolerant parsing.
1681#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1682#[non_exhaustive]
1683#[serde(rename_all = "camelCase", tag = "kind")]
1684pub enum ParseFact {
1685    /// Header fact.
1686    Header {
1687        /// Header offset in bytes.
1688        offset: u64,
1689        /// PDF version.
1690        version: PdfVersion,
1691        /// Whether bytes preceded the header.
1692        #[serde(rename = "hadLeadingBytes")]
1693        had_leading_bytes: bool,
1694    },
1695    /// Bytes after EOF marker.
1696    PostEofData {
1697        /// Post-EOF byte count.
1698        bytes: u64,
1699    },
1700    /// Cross-reference fact.
1701    Xref {
1702        /// Xref section location.
1703        section: ObjectLocation,
1704        /// Xref-specific fact.
1705        fact: XrefFact,
1706    },
1707    /// Stream fact.
1708    Stream {
1709        /// Stream object key.
1710        object: ObjectKey,
1711        /// Stream-specific fact.
1712        fact: StreamFact,
1713    },
1714    /// Encryption fact.
1715    Encryption {
1716        /// Whether encryption was detected.
1717        encrypted: bool,
1718        /// Encryption handler when known.
1719        #[serde(skip_serializing_if = "Option::is_none")]
1720        handler: Option<Identifier>,
1721        /// Encryption version when known.
1722        #[serde(skip_serializing_if = "Option::is_none")]
1723        version: Option<u8>,
1724        /// Security handler revision when known.
1725        #[serde(skip_serializing_if = "Option::is_none")]
1726        revision: Option<u8>,
1727        /// Selected object encryption algorithm when known.
1728        #[serde(skip_serializing_if = "Option::is_none")]
1729        algorithm: Option<Identifier>,
1730        /// Whether decryption succeeded.
1731        decrypted: bool,
1732    },
1733    /// XMP metadata fact.
1734    Xmp {
1735        /// Metadata stream object key.
1736        object: ObjectKey,
1737        /// XMP-specific fact.
1738        fact: XmpFact,
1739    },
1740}
1741
1742/// PDF version.
1743#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1744#[non_exhaustive]
1745#[serde(rename_all = "camelCase", deny_unknown_fields)]
1746pub struct PdfVersion {
1747    /// Major version.
1748    pub major: u8,
1749    /// Minor version.
1750    pub minor: u8,
1751}
1752
1753/// Cross-reference parser fact.
1754#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1755#[non_exhaustive]
1756#[serde(rename_all = "camelCase")]
1757pub enum XrefFact {
1758    /// Classic xref section had compliant EOL markers.
1759    EolMarkersComply,
1760    /// Classic xref section was malformed but recoverable.
1761    MalformedClassic,
1762    /// Xref stream was detected and is unsupported in M0.
1763    XrefStreamUnsupported,
1764    /// Xref stream was parsed.
1765    XrefStreamParsed {
1766        /// Number of xref entries parsed.
1767        entries: u64,
1768        /// Number of compressed-object entries parsed.
1769        compressed_entries: u64,
1770    },
1771    /// A previous xref section offset was declared.
1772    PrevChain {
1773        /// Previous xref byte offset.
1774        offset: u64,
1775    },
1776    /// A hybrid-reference xref stream offset was declared.
1777    HybridReference {
1778        /// Hybrid xref stream byte offset.
1779        offset: u64,
1780    },
1781    /// Object stream was parsed and expanded.
1782    ObjectStreamParsed,
1783}
1784
1785/// Stream parser fact.
1786#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1787#[non_exhaustive]
1788#[serde(rename_all = "camelCase", deny_unknown_fields, tag = "fact")]
1789pub enum StreamFact {
1790    /// Declared and discovered stream lengths.
1791    Length {
1792        /// Declared stream length.
1793        declared: u64,
1794        /// Discovered stream length.
1795        discovered: u64,
1796    },
1797    /// Stream keyword spacing compliance.
1798    KeywordSpacing {
1799        /// `stream` keyword spacing compliance.
1800        #[serde(rename = "streamKeywordCRLFCompliant")]
1801        stream_keyword_crlf_compliant: bool,
1802        /// `endstream` keyword spacing compliance.
1803        #[serde(rename = "endstreamKeywordEolCompliant")]
1804        endstream_keyword_eol_compliant: bool,
1805    },
1806    /// Stream was decoded within configured limits.
1807    Decoded {
1808        /// Decoded stream byte count.
1809        bytes: u64,
1810    },
1811    /// A single stream filter decoded successfully.
1812    FilterDecoded {
1813        /// Filter name.
1814        filter: Identifier,
1815        /// Input bytes consumed by this filter.
1816        input_bytes: u64,
1817        /// Output bytes produced by this filter.
1818        output_bytes: u64,
1819    },
1820    /// A filter was retained in byte-preserving metadata mode.
1821    FilterMetadataMode {
1822        /// Filter name.
1823        filter: Identifier,
1824        /// Bytes preserved without pixel/image decoding.
1825        bytes: u64,
1826    },
1827}
1828
1829/// XMP metadata parser fact.
1830#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1831#[non_exhaustive]
1832#[serde(rename_all = "camelCase", deny_unknown_fields, tag = "fact")]
1833pub enum XmpFact {
1834    /// XMP packet was extracted and parsed.
1835    PacketParsed {
1836        /// Packet byte count.
1837        bytes: u64,
1838        /// Number of namespace declarations retained.
1839        namespaces: u64,
1840        /// Number of recognized flavour claims.
1841        claims: u64,
1842    },
1843    /// XMP packet wrapper was absent.
1844    MissingPacketWrapper,
1845    /// Recognized XMP flavour claim.
1846    FlavourClaim {
1847        /// Flavour family.
1848        family: Identifier,
1849        /// Profile display spelling.
1850        display_flavour: BoundedText,
1851        /// Namespace URI that supplied the claim.
1852        namespace_uri: BoundedText,
1853    },
1854    /// XMP XML was malformed or unsupported.
1855    Malformed {
1856        /// Bounded reason string.
1857        reason: BoundedText,
1858    },
1859    /// DTD or entity processing was rejected.
1860    HostileXmlRejected {
1861        /// Bounded reason string.
1862        reason: BoundedText,
1863    },
1864}
1865
1866/// Validation warning.
1867#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1868#[non_exhaustive]
1869#[serde(rename_all = "camelCase", tag = "kind")]
1870pub enum ValidationWarning {
1871    /// Parse facts exceeded the configured retention cap.
1872    ParseFactCapReached {
1873        /// Configured cap.
1874        cap: usize,
1875    },
1876    /// Incompatible profile was skipped.
1877    IncompatibleProfile {
1878        /// Profile id.
1879        profile_id: Identifier,
1880        /// Skip reason.
1881        reason: BoundedText,
1882    },
1883    /// Auto flavour detection fell back or could not select a profile.
1884    AutoDetection {
1885        /// Bounded warning message.
1886        message: BoundedText,
1887    },
1888    /// General bounded warning.
1889    General {
1890        /// Warning message.
1891        message: BoundedText,
1892    },
1893}
1894
1895impl ValidationWarning {
1896    /// Returns a bounded human-readable warning message.
1897    #[must_use]
1898    pub fn message_text(&self) -> BoundedText {
1899        match self {
1900            Self::ParseFactCapReached { cap } => {
1901                BoundedText::unchecked(format!("parse fact cap reached: {cap}"))
1902            }
1903            Self::IncompatibleProfile { profile_id, reason } => BoundedText::unchecked(format!(
1904                "incompatible profile {}: {}",
1905                profile_id.as_str(),
1906                reason.as_str()
1907            )),
1908            Self::AutoDetection { message } => {
1909                BoundedText::unchecked(format!("auto detection: {}", message.as_str()))
1910            }
1911            Self::General { message } => message.clone(),
1912        }
1913    }
1914}
1915
1916/// Task duration entry.
1917#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1918#[non_exhaustive]
1919#[serde(rename_all = "camelCase", deny_unknown_fields)]
1920pub struct TaskDuration {
1921    /// Task name.
1922    pub task: Identifier,
1923    /// Duration in milliseconds.
1924    pub millis: u64,
1925}
1926
1927impl TaskDuration {
1928    /// Creates a task duration from a [`Duration`].
1929    ///
1930    /// Values larger than `u64::MAX` milliseconds saturate.
1931    #[must_use]
1932    pub fn from_duration(task: Identifier, duration: Duration) -> Self {
1933        let millis = u64::try_from(duration.as_millis()).unwrap_or(u64::MAX);
1934        Self { task, millis }
1935    }
1936}
1937
1938/// Batch validation report.
1939#[derive(Clone, Debug, Deserialize, Serialize)]
1940#[non_exhaustive]
1941#[serde(rename_all = "camelCase", deny_unknown_fields)]
1942pub struct BatchReport {
1943    /// Item reports.
1944    pub items: Vec<ValidationReport>,
1945    /// Batch summary.
1946    pub summary: BatchSummary,
1947    /// Batch-level warnings.
1948    pub warnings: Vec<ValidationWarning>,
1949}
1950
1951impl BatchReport {
1952    /// Builds a batch report and computes summary counters from item reports.
1953    #[must_use]
1954    pub fn from_items(
1955        items: Vec<ValidationReport>,
1956        warnings: Vec<ValidationWarning>,
1957        elapsed: Duration,
1958    ) -> Self {
1959        let summary = BatchSummary::from_items(&items, elapsed);
1960        Self {
1961            items,
1962            summary,
1963            warnings,
1964        }
1965    }
1966
1967    /// Builds a batch report with internal per-input error count.
1968    #[must_use]
1969    pub fn from_items_with_internal_errors(
1970        items: Vec<ValidationReport>,
1971        warnings: Vec<ValidationWarning>,
1972        elapsed: Duration,
1973        internal_errors: u64,
1974    ) -> Self {
1975        let summary =
1976            BatchSummary::from_items_with_internal_errors(&items, elapsed, internal_errors);
1977        Self {
1978            items,
1979            summary,
1980            warnings,
1981        }
1982    }
1983}
1984
1985/// Batch summary counters.
1986#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
1987#[non_exhaustive]
1988#[serde(rename_all = "camelCase", deny_unknown_fields)]
1989pub struct BatchSummary {
1990    /// Total input count.
1991    pub total_files: u64,
1992    /// Valid input count.
1993    pub valid: u64,
1994    /// Invalid input count.
1995    pub invalid: u64,
1996    /// Parse failure count.
1997    pub parse_failures: u64,
1998    /// Encrypted input count.
1999    pub encrypted: u64,
2000    /// Incomplete validation count.
2001    pub incomplete: u64,
2002    /// Internal error count.
2003    pub internal_errors: u64,
2004    /// Elapsed milliseconds.
2005    pub elapsed_millis: u64,
2006    /// Worst exit category.
2007    pub worst_exit_category: ExitCategory,
2008}
2009
2010impl BatchSummary {
2011    /// Computes batch summary counters from item reports.
2012    #[must_use]
2013    pub fn from_items(items: &[ValidationReport], elapsed: Duration) -> Self {
2014        let mut summary = Self {
2015            total_files: u64::try_from(items.len()).unwrap_or(u64::MAX),
2016            elapsed_millis: duration_millis(elapsed),
2017            ..Self::default()
2018        };
2019        summary.apply_items(items);
2020        summary.finish()
2021    }
2022
2023    /// Computes batch summary counters from item reports plus internal error count.
2024    #[must_use]
2025    pub fn from_items_with_internal_errors(
2026        items: &[ValidationReport],
2027        elapsed: Duration,
2028        internal_errors: u64,
2029    ) -> Self {
2030        let mut summary = Self {
2031            total_files: u64::try_from(items.len())
2032                .unwrap_or(u64::MAX)
2033                .saturating_add(internal_errors),
2034            elapsed_millis: duration_millis(elapsed),
2035            internal_errors,
2036            ..Self::default()
2037        };
2038        summary.apply_items(items);
2039        summary.finish()
2040    }
2041
2042    fn apply_items(&mut self, items: &[ValidationReport]) {
2043        for report in items {
2044            match report.status {
2045                ValidationStatus::Valid => self.valid = self.valid.saturating_add(1),
2046                ValidationStatus::Invalid => self.invalid = self.invalid.saturating_add(1),
2047                ValidationStatus::ParseFailed => {
2048                    self.parse_failures = self.parse_failures.saturating_add(1);
2049                }
2050                ValidationStatus::Encrypted => {
2051                    self.encrypted = self.encrypted.saturating_add(1);
2052                }
2053                ValidationStatus::Incomplete => {
2054                    self.incomplete = self.incomplete.saturating_add(1);
2055                }
2056            }
2057        }
2058    }
2059
2060    fn finish(mut self) -> Self {
2061        self.worst_exit_category = if self.parse_failures > 0
2062            || self.encrypted > 0
2063            || self.incomplete > 0
2064            || self.internal_errors > 0
2065        {
2066            ExitCategory::ProcessingFailed
2067        } else if self.invalid > 0 {
2068            ExitCategory::ValidationFailed
2069        } else {
2070            ExitCategory::Success
2071        };
2072        self
2073    }
2074}
2075
2076/// CLI-oriented exit category represented in batch summaries.
2077#[derive(Clone, Copy, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
2078#[non_exhaustive]
2079#[serde(rename_all = "camelCase")]
2080pub enum ExitCategory {
2081    /// Success.
2082    #[default]
2083    Success,
2084    /// Validation found non-compliance.
2085    ValidationFailed,
2086    /// Input could not be processed.
2087    ProcessingFailed,
2088    /// Internal application failure.
2089    InternalError,
2090}
2091
2092/// Report output format.
2093#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
2094#[non_exhaustive]
2095#[serde(rename_all = "camelCase")]
2096pub enum ReportFormat {
2097    /// Compact JSON.
2098    Json,
2099    /// Pretty-printed JSON.
2100    JsonPretty,
2101    /// Human-readable text.
2102    Text,
2103    /// Machine-readable XML compatibility report.
2104    Xml,
2105    /// Raw processor-style XML report.
2106    RawXml,
2107    /// Static human-readable HTML report.
2108    Html,
2109}
2110
2111impl ReportFormat {
2112    /// Writes a validation report in this format.
2113    ///
2114    /// # Errors
2115    ///
2116    /// Returns [`PdfvError`] if serialization or writing fails.
2117    pub fn write_report<W: Write>(&self, report: &ValidationReport, out: W) -> Result<()> {
2118        match self {
2119            Self::Json => JsonReportWriter::compact().write_report(report, out),
2120            Self::JsonPretty => JsonReportWriter::pretty().write_report(report, out),
2121            Self::Text => TextReportWriter.write_report(report, out),
2122            Self::Xml => XmlReportWriter.write_report(report, out),
2123            Self::RawXml => RawXmlReportWriter.write_report(report, out),
2124            Self::Html => HtmlReportWriter.write_report(report, out),
2125        }
2126    }
2127
2128    /// Writes a batch validation report in this format.
2129    ///
2130    /// # Errors
2131    ///
2132    /// Returns [`PdfvError`] if serialization or writing fails.
2133    pub fn write_batch<W: Write>(&self, report: &BatchReport, out: W) -> Result<()> {
2134        match self {
2135            Self::Json => JsonReportWriter::compact().write_batch(report, out),
2136            Self::JsonPretty => JsonReportWriter::pretty().write_batch(report, out),
2137            Self::Text => TextReportWriter.write_batch(report, out),
2138            Self::Xml => XmlReportWriter.write_batch(report, out),
2139            Self::RawXml => RawXmlReportWriter.write_batch(report, out),
2140            Self::Html => HtmlReportWriter.write_batch(report, out),
2141        }
2142    }
2143
2144    /// Writes a metadata repair report in this format.
2145    ///
2146    /// # Errors
2147    ///
2148    /// Returns [`PdfvError`] if serialization or writing fails.
2149    pub fn write_repair_report<W: Write>(&self, report: &RepairReport, out: W) -> Result<()> {
2150        match self {
2151            Self::Json => JsonReportWriter::compact().write_repair_report(report, out),
2152            Self::JsonPretty => JsonReportWriter::pretty().write_repair_report(report, out),
2153            Self::Text => TextReportWriter.write_repair_report(report, out),
2154            Self::Xml => XmlReportWriter.write_repair_report(report, out),
2155            Self::RawXml => RawXmlReportWriter.write_repair_report(report, out),
2156            Self::Html => HtmlReportWriter.write_repair_report(report, out),
2157        }
2158    }
2159
2160    /// Writes a batch metadata repair report in this format.
2161    ///
2162    /// # Errors
2163    ///
2164    /// Returns [`PdfvError`] if serialization or writing fails.
2165    pub fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, out: W) -> Result<()> {
2166        match self {
2167            Self::Json => JsonReportWriter::compact().write_repair_batch(report, out),
2168            Self::JsonPretty => JsonReportWriter::pretty().write_repair_batch(report, out),
2169            Self::Text => TextReportWriter.write_repair_batch(report, out),
2170            Self::Xml => XmlReportWriter.write_repair_batch(report, out),
2171            Self::RawXml => RawXmlReportWriter.write_repair_batch(report, out),
2172            Self::Html => HtmlReportWriter.write_repair_batch(report, out),
2173        }
2174    }
2175}
2176
2177/// Report writer interface.
2178pub trait ReportWriter {
2179    /// Writes a single validation report.
2180    ///
2181    /// # Errors
2182    ///
2183    /// Returns [`PdfvError`] if serialization or writing fails.
2184    fn write_report<W: Write>(&self, report: &ValidationReport, out: W) -> Result<()>;
2185
2186    /// Writes a batch validation report.
2187    ///
2188    /// # Errors
2189    ///
2190    /// Returns [`PdfvError`] if serialization or writing fails.
2191    fn write_batch<W: Write>(&self, report: &BatchReport, out: W) -> Result<()>;
2192
2193    /// Writes a single metadata repair report.
2194    ///
2195    /// # Errors
2196    ///
2197    /// Returns [`PdfvError`] if serialization or writing fails.
2198    fn write_repair_report<W: Write>(&self, report: &RepairReport, out: W) -> Result<()>;
2199
2200    /// Writes a batch metadata repair report.
2201    ///
2202    /// # Errors
2203    ///
2204    /// Returns [`PdfvError`] if serialization or writing fails.
2205    fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, out: W) -> Result<()>;
2206}
2207
2208/// JSON report writer.
2209#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2210pub struct JsonReportWriter {
2211    pretty: bool,
2212}
2213
2214impl JsonReportWriter {
2215    /// Creates a compact JSON report writer.
2216    #[must_use]
2217    pub fn compact() -> Self {
2218        Self { pretty: false }
2219    }
2220
2221    /// Creates a pretty JSON report writer.
2222    #[must_use]
2223    pub fn pretty() -> Self {
2224        Self { pretty: true }
2225    }
2226}
2227
2228impl ReportWriter for JsonReportWriter {
2229    fn write_report<W: Write>(&self, report: &ValidationReport, out: W) -> Result<()> {
2230        write_json(out, report, self.pretty)
2231    }
2232
2233    fn write_batch<W: Write>(&self, report: &BatchReport, out: W) -> Result<()> {
2234        write_json(out, report, self.pretty)
2235    }
2236
2237    fn write_repair_report<W: Write>(&self, report: &RepairReport, out: W) -> Result<()> {
2238        write_json(out, report, self.pretty)
2239    }
2240
2241    fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, out: W) -> Result<()> {
2242        write_json(out, report, self.pretty)
2243    }
2244}
2245
2246/// Human-readable text report writer.
2247#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2248pub struct TextReportWriter;
2249
2250impl ReportWriter for TextReportWriter {
2251    fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
2252        write_text_report(report, &mut out)
2253    }
2254
2255    fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
2256        writeln!(
2257            out,
2258            "batch: {}",
2259            exit_category_text(report.summary.worst_exit_category)
2260        )
2261        .map_err(write_error)?;
2262        writeln!(out, "files: {}", report.summary.total_files).map_err(write_error)?;
2263        writeln!(
2264            out,
2265            "summary: {} valid, {} invalid, {} parse failed, {} encrypted, {} incomplete, {} \
2266             internal errors",
2267            report.summary.valid,
2268            report.summary.invalid,
2269            report.summary.parse_failures,
2270            report.summary.encrypted,
2271            report.summary.incomplete,
2272            report.summary.internal_errors,
2273        )
2274        .map_err(write_error)?;
2275        if !report.warnings.is_empty() {
2276            writeln!(out, "warnings: {}", report.warnings.len()).map_err(write_error)?;
2277        }
2278        writeln!(out, "items:").map_err(write_error)?;
2279        for item in &report.items {
2280            writeln!(
2281                out,
2282                "  {}: {}",
2283                source_name(&item.source),
2284                status_text(item.status)
2285            )
2286            .map_err(write_error)?;
2287        }
2288        Ok(())
2289    }
2290
2291    fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
2292        write_text_repair_report(report, &mut out)
2293    }
2294
2295    fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
2296        writeln!(
2297            out,
2298            "repair batch: {}",
2299            exit_category_text(report.summary.worst_exit_category)
2300        )
2301        .map_err(write_error)?;
2302        writeln!(out, "files: {}", report.summary.total_files).map_err(write_error)?;
2303        writeln!(
2304            out,
2305            "summary: {} repaired, {} unchanged, {} refused, {} failed",
2306            report.summary.succeeded,
2307            report.summary.no_action,
2308            report.summary.refused,
2309            report.summary.failed,
2310        )
2311        .map_err(write_error)?;
2312        writeln!(out, "items:").map_err(write_error)?;
2313        for item in &report.items {
2314            writeln!(
2315                out,
2316                "  {}: {}",
2317                source_name(&item.source),
2318                repair_status_text(item.status),
2319            )
2320            .map_err(write_error)?;
2321        }
2322        Ok(())
2323    }
2324}
2325
2326/// Machine-readable XML report writer.
2327#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2328pub struct XmlReportWriter;
2329
2330impl ReportWriter for XmlReportWriter {
2331    fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
2332        let batch = BatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2333        write_xml_batch(&batch, &mut out)
2334    }
2335
2336    fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
2337        write_xml_batch(report, &mut out)
2338    }
2339
2340    fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
2341        let batch = RepairBatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2342        write_xml_repair_batch(&batch, &mut out, "repairReport")
2343    }
2344
2345    fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
2346        write_xml_repair_batch(report, &mut out, "repairReport")
2347    }
2348}
2349
2350/// Raw processor-style XML report writer.
2351#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2352pub struct RawXmlReportWriter;
2353
2354impl ReportWriter for RawXmlReportWriter {
2355    fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
2356        let batch = BatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2357        write_raw_xml_batch(&batch, &mut out)
2358    }
2359
2360    fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
2361        write_raw_xml_batch(report, &mut out)
2362    }
2363
2364    fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
2365        let batch = RepairBatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2366        write_xml_repair_batch(&batch, &mut out, "rawRepairReport")
2367    }
2368
2369    fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
2370        write_xml_repair_batch(report, &mut out, "rawRepairReport")
2371    }
2372}
2373
2374/// Static HTML report writer.
2375#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2376pub struct HtmlReportWriter;
2377
2378impl ReportWriter for HtmlReportWriter {
2379    fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
2380        let batch = BatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2381        write_html_batch(&batch, &mut out)
2382    }
2383
2384    fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
2385        write_html_batch(report, &mut out)
2386    }
2387
2388    fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
2389        let batch = RepairBatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2390        write_html_repair_batch(&batch, &mut out)
2391    }
2392
2393    fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
2394        write_html_repair_batch(report, &mut out)
2395    }
2396}
2397
2398fn write_json<W, T>(out: W, value: &T, pretty: bool) -> Result<()>
2399where
2400    W: Write,
2401    T: Serialize,
2402{
2403    if pretty {
2404        serde_json::to_writer_pretty(out, value).map_err(ReportError::from)?;
2405    } else {
2406        serde_json::to_writer(out, value).map_err(ReportError::from)?;
2407    }
2408    Ok(())
2409}
2410
2411fn write_text_report<W: Write>(report: &ValidationReport, out: &mut W) -> Result<()> {
2412    writeln!(
2413        out,
2414        "{}: {}",
2415        source_name(&report.source),
2416        status_text(report.status),
2417    )
2418    .map_err(write_error)?;
2419    writeln!(out, "profiles: {}", profile_list(report)).map_err(write_error)?;
2420    let checks = check_counts(report);
2421    writeln!(
2422        out,
2423        "checks: {} passed, {} failed, {} unsupported",
2424        checks.passed, checks.failed, checks.unsupported,
2425    )
2426    .map_err(write_error)?;
2427    let failures = report
2428        .profile_reports
2429        .iter()
2430        .flat_map(|profile| profile.failed_assertions.iter())
2431        .take(5)
2432        .collect::<Vec<_>>();
2433    if !failures.is_empty() {
2434        writeln!(out, "first failures:").map_err(write_error)?;
2435        for assertion in failures {
2436            writeln!(
2437                out,
2438                "  {} at {}: {}",
2439                assertion.rule_id.0.as_str(),
2440                location_text(&assertion.location),
2441                assertion_message(assertion),
2442            )
2443            .map_err(write_error)?;
2444        }
2445    }
2446    let unsupported = report
2447        .profile_reports
2448        .iter()
2449        .flat_map(|profile| profile.unsupported_rules.iter())
2450        .take(5)
2451        .collect::<Vec<_>>();
2452    if !unsupported.is_empty() {
2453        writeln!(out, "unsupported rules:").map_err(write_error)?;
2454        for rule in unsupported {
2455            writeln!(
2456                out,
2457                "  {}: {}{}",
2458                rule.rule_id.0.as_str(),
2459                rule.reason.as_str(),
2460                reference_suffix(&rule.references),
2461            )
2462            .map_err(write_error)?;
2463        }
2464    }
2465    if !report.warnings.is_empty() {
2466        writeln!(out, "warnings: {}", report.warnings.len()).map_err(write_error)?;
2467    }
2468    if let Some(features) = &report.feature_report {
2469        writeln!(out, "features: {} objects", features.objects.len()).map_err(write_error)?;
2470    }
2471    if let Some(policy) = &report.policy_report {
2472        writeln!(
2473            out,
2474            "policy: {}",
2475            if policy.is_compliant {
2476                "compliant"
2477            } else {
2478                "non-compliant"
2479            }
2480        )
2481        .map_err(write_error)?;
2482    }
2483    Ok(())
2484}
2485
2486fn write_text_repair_report<W: Write>(report: &RepairReport, out: &mut W) -> Result<()> {
2487    writeln!(
2488        out,
2489        "{}: {}",
2490        source_name(&report.source),
2491        repair_status_text(report.status),
2492    )
2493    .map_err(write_error)?;
2494    if let Some(output_path) = &report.output_path {
2495        writeln!(out, "output: {}", output_path.display()).map_err(write_error)?;
2496    }
2497    if !report.actions.is_empty() {
2498        writeln!(out, "actions: {}", report.actions.len()).map_err(write_error)?;
2499        for action in &report.actions {
2500            writeln!(out, "  {}", repair_action_text(action)).map_err(write_error)?;
2501        }
2502    }
2503    if let Some(refusal) = &report.refusal {
2504        writeln!(out, "refusal: {}", repair_refusal_text(refusal)).map_err(write_error)?;
2505    }
2506    if !report.warnings.is_empty() {
2507        writeln!(out, "warnings: {}", report.warnings.len()).map_err(write_error)?;
2508    }
2509    Ok(())
2510}
2511
2512fn write_xml_batch<W: Write>(report: &BatchReport, out: &mut W) -> Result<()> {
2513    writeln!(out, r#"<?xml version="1.0" encoding="utf-8"?>"#).map_err(write_error)?;
2514    writeln!(out, "<report>").map_err(write_error)?;
2515    writeln!(out, "  <buildInformation>").map_err(write_error)?;
2516    writeln!(
2517        out,
2518        r#"    <releaseDetails id="pdfv-core" version="{}"></releaseDetails>"#,
2519        XmlEscapedAttr::new(ENGINE_VERSION)?,
2520    )
2521    .map_err(write_error)?;
2522    writeln!(out, "  </buildInformation>").map_err(write_error)?;
2523    writeln!(out, "  <jobs>").map_err(write_error)?;
2524    for item in &report.items {
2525        write_xml_job(item, out)?;
2526    }
2527    writeln!(out, "  </jobs>").map_err(write_error)?;
2528    write_xml_batch_summary(&report.summary, out)?;
2529    write_xml_warnings(&report.warnings, out, 2)?;
2530    writeln!(out, "</report>").map_err(write_error)?;
2531    Ok(())
2532}
2533
2534fn write_raw_xml_batch<W: Write>(report: &BatchReport, out: &mut W) -> Result<()> {
2535    writeln!(out, r#"<?xml version="1.0" encoding="utf-8"?>"#).map_err(write_error)?;
2536    writeln!(
2537        out,
2538        r#"<rawReport engine="pdfv-core" version="{}">"#,
2539        XmlEscapedAttr::new(ENGINE_VERSION)?,
2540    )
2541    .map_err(write_error)?;
2542    writeln!(
2543        out,
2544        r#"  <processorConfig tasks="{}"></processorConfig>"#,
2545        XmlEscapedAttr::new(&raw_validation_tasks(report))?,
2546    )
2547    .map_err(write_error)?;
2548    writeln!(out, "  <processorResults>").map_err(write_error)?;
2549    for item in &report.items {
2550        writeln!(
2551            out,
2552            r#"    <processorResult status="{}">"#,
2553            status_text(item.status),
2554        )
2555        .map_err(write_error)?;
2556        write_xml_item(&item.source, out)?;
2557        for profile in &item.profile_reports {
2558            write_xml_validation_report(item.status, profile, out)?;
2559        }
2560        if let Some(feature_report) = &item.feature_report {
2561            write_xml_feature_report(feature_report, out)?;
2562        }
2563        if let Some(policy_report) = &item.policy_report {
2564            write_xml_policy_report(policy_report, out)?;
2565        }
2566        write_xml_parse_facts(&item.parse_facts, out)?;
2567        write_xml_warnings(&item.warnings, out, 6)?;
2568        writeln!(out, "    </processorResult>").map_err(write_error)?;
2569    }
2570    writeln!(out, "  </processorResults>").map_err(write_error)?;
2571    write_xml_batch_summary(&report.summary, out)?;
2572    writeln!(out, "</rawReport>").map_err(write_error)?;
2573    Ok(())
2574}
2575
2576fn write_xml_repair_batch<W: Write>(
2577    report: &RepairBatchReport,
2578    out: &mut W,
2579    root: &str,
2580) -> Result<()> {
2581    writeln!(out, r#"<?xml version="1.0" encoding="utf-8"?>"#).map_err(write_error)?;
2582    writeln!(
2583        out,
2584        r#"<{root} engine="pdfv-core" version="{}">"#,
2585        XmlEscapedAttr::new(ENGINE_VERSION)?,
2586    )
2587    .map_err(write_error)?;
2588    if root == "rawRepairReport" {
2589        writeln!(
2590            out,
2591            r#"  <processorConfig tasks="metadata"></processorConfig>"#,
2592        )
2593        .map_err(write_error)?;
2594    }
2595    writeln!(out, "  <items>").map_err(write_error)?;
2596    for item in &report.items {
2597        write_xml_repair_item(item, out)?;
2598    }
2599    writeln!(out, "  </items>").map_err(write_error)?;
2600    write_xml_repair_summary(&report.summary, out)?;
2601    write_xml_warnings(&report.warnings, out, 2)?;
2602    writeln!(out, "</{root}>").map_err(write_error)?;
2603    Ok(())
2604}
2605
2606fn write_xml_repair_item<W: Write>(report: &RepairReport, out: &mut W) -> Result<()> {
2607    writeln!(
2608        out,
2609        r#"    <repairItem status="{}">"#,
2610        repair_status_text(report.status),
2611    )
2612    .map_err(write_error)?;
2613    write_xml_item(&report.source, out)?;
2614    if let Some(output_path) = &report.output_path {
2615        writeln!(
2616            out,
2617            "      <output>{}</output>",
2618            XmlEscapedText::new(&output_path.display().to_string())?,
2619        )
2620        .map_err(write_error)?;
2621    }
2622    if !report.actions.is_empty() {
2623        writeln!(out, "      <actions>").map_err(write_error)?;
2624        for action in &report.actions {
2625            writeln!(
2626                out,
2627                r#"        <action kind="{}">{}</action>"#,
2628                repair_action_kind(action),
2629                XmlEscapedText::new(&repair_action_text(action))?,
2630            )
2631            .map_err(write_error)?;
2632        }
2633        writeln!(out, "      </actions>").map_err(write_error)?;
2634    }
2635    if let Some(refusal) = &report.refusal {
2636        writeln!(
2637            out,
2638            r#"      <refusal kind="{}">{}</refusal>"#,
2639            repair_refusal_kind(refusal),
2640            XmlEscapedText::new(&repair_refusal_text(refusal))?,
2641        )
2642        .map_err(write_error)?;
2643    }
2644    write_xml_warnings(&report.warnings, out, 6)?;
2645    writeln!(out, "    </repairItem>").map_err(write_error)?;
2646    Ok(())
2647}
2648
2649fn write_xml_repair_summary<W: Write>(summary: &RepairBatchSummary, out: &mut W) -> Result<()> {
2650    writeln!(
2651        out,
2652        r#"  <repairSummary totalJobs="{}" succeeded="{}" noAction="{}" refused="{}" failed="{}" elapsedMillis="{}"></repairSummary>"#,
2653        summary.total_files,
2654        summary.succeeded,
2655        summary.no_action,
2656        summary.refused,
2657        summary.failed,
2658        summary.elapsed_millis,
2659    )
2660    .map_err(write_error)?;
2661    Ok(())
2662}
2663
2664fn raw_validation_tasks(report: &BatchReport) -> String {
2665    let has_features = report
2666        .items
2667        .iter()
2668        .any(|item| item.feature_report.is_some());
2669    let has_policy = report.items.iter().any(|item| item.policy_report.is_some());
2670    let mut tasks = vec!["validation"];
2671    if has_features {
2672        tasks.push("features");
2673    }
2674    if has_policy {
2675        tasks.push("policy");
2676    }
2677    tasks.join(",")
2678}
2679
2680fn write_html_batch<W: Write>(report: &BatchReport, out: &mut W) -> Result<()> {
2681    write_html_start(out, "pdfv validation report")?;
2682    writeln!(out, "<h1>Validation Report</h1>").map_err(write_error)?;
2683    writeln!(
2684        out,
2685        "<p>{} valid, {} invalid, {} parse failed, {} encrypted, {} incomplete.</p>",
2686        report.summary.valid,
2687        report.summary.invalid,
2688        report.summary.parse_failures,
2689        report.summary.encrypted,
2690        report.summary.incomplete,
2691    )
2692    .map_err(write_error)?;
2693    writeln!(
2694        out,
2695        "<table><thead><tr><th>Input</th><th>Status</th><th>Profiles</th><th>Features</\
2696         th><th>Policy</th></tr></thead><tbody>"
2697    )
2698    .map_err(write_error)?;
2699    for item in &report.items {
2700        let features = item
2701            .feature_report
2702            .as_ref()
2703            .map_or(String::from("-"), |features| {
2704                features.objects.len().to_string()
2705            });
2706        let policy = item.policy_report.as_ref().map_or("-", |policy| {
2707            if policy.is_compliant {
2708                "compliant"
2709            } else {
2710                "non-compliant"
2711            }
2712        });
2713        writeln!(
2714            out,
2715            "<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
2716            HtmlEscapedText::new(&source_name(&item.source))?,
2717            status_text(item.status),
2718            HtmlEscapedText::new(&profile_list(item))?,
2719            features,
2720            policy,
2721        )
2722        .map_err(write_error)?;
2723    }
2724    writeln!(out, "</tbody></table>").map_err(write_error)?;
2725    write_html_end(out)
2726}
2727
2728fn write_html_repair_batch<W: Write>(report: &RepairBatchReport, out: &mut W) -> Result<()> {
2729    write_html_start(out, "pdfv metadata repair report")?;
2730    writeln!(out, "<h1>Metadata Repair Report</h1>").map_err(write_error)?;
2731    writeln!(
2732        out,
2733        "<p>{} repaired, {} unchanged, {} refused, {} failed.</p>",
2734        report.summary.succeeded,
2735        report.summary.no_action,
2736        report.summary.refused,
2737        report.summary.failed,
2738    )
2739    .map_err(write_error)?;
2740    writeln!(
2741        out,
2742        "<table><thead><tr><th>Input</th><th>Status</th><th>Output</th><th>Reason</th></tr></\
2743         thead><tbody>"
2744    )
2745    .map_err(write_error)?;
2746    for item in &report.items {
2747        let output = item
2748            .output_path
2749            .as_ref()
2750            .map_or_else(String::new, |path| path.display().to_string());
2751        let reason = item
2752            .refusal
2753            .as_ref()
2754            .map_or_else(String::new, repair_refusal_text);
2755        writeln!(
2756            out,
2757            "<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
2758            HtmlEscapedText::new(&source_name(&item.source))?,
2759            repair_status_text(item.status),
2760            HtmlEscapedText::new(&output)?,
2761            HtmlEscapedText::new(&reason)?,
2762        )
2763        .map_err(write_error)?;
2764    }
2765    writeln!(out, "</tbody></table>").map_err(write_error)?;
2766    write_html_end(out)
2767}
2768
2769fn write_html_start<W: Write>(out: &mut W, title: &str) -> Result<()> {
2770    writeln!(out, "<!doctype html>").map_err(write_error)?;
2771    writeln!(
2772        out,
2773        r#"<html lang="en"><head><meta charset="utf-8"><title>{}</title><style>body{{font-family:system-ui,sans-serif;margin:2rem;color:#1f2937}}table{{border-collapse:collapse;width:100%}}th,td{{border:1px solid #d1d5db;padding:.4rem;text-align:left}}th{{background:#f3f4f6}}</style></head><body>"#,
2774        HtmlEscapedText::new(title)?,
2775    )
2776    .map_err(write_error)?;
2777    Ok(())
2778}
2779
2780fn write_html_end<W: Write>(out: &mut W) -> Result<()> {
2781    writeln!(out, "</body></html>").map_err(write_error)?;
2782    Ok(())
2783}
2784
2785fn write_xml_job<W: Write>(report: &ValidationReport, out: &mut W) -> Result<()> {
2786    writeln!(out, "    <job>").map_err(write_error)?;
2787    write_xml_item(&report.source, out)?;
2788    for profile in &report.profile_reports {
2789        write_xml_validation_report(report.status, profile, out)?;
2790    }
2791    if report.profile_reports.is_empty() {
2792        writeln!(
2793            out,
2794            r#"      <validationReport profileName="" statement="{}" isCompliant="false">"#,
2795            XmlEscapedAttr::new(status_statement(report.status))?,
2796        )
2797        .map_err(write_error)?;
2798        writeln!(
2799            out,
2800            r#"        <details passedRules="0" failedRules="0" passedChecks="0" failedChecks="0" unsupportedRules="0"></details>"#,
2801        )
2802        .map_err(write_error)?;
2803        writeln!(out, "      </validationReport>").map_err(write_error)?;
2804    }
2805    write_xml_parse_facts(&report.parse_facts, out)?;
2806    if let Some(feature_report) = &report.feature_report {
2807        write_xml_feature_report(feature_report, out)?;
2808    }
2809    if let Some(policy_report) = &report.policy_report {
2810        write_xml_policy_report(policy_report, out)?;
2811    }
2812    write_xml_warnings(&report.warnings, out, 6)?;
2813    writeln!(out, "    </job>").map_err(write_error)?;
2814    Ok(())
2815}
2816
2817fn write_xml_item<W: Write>(source: &InputSummary, out: &mut W) -> Result<()> {
2818    let size = source
2819        .bytes
2820        .map_or_else(String::new, |bytes| format!(r#" size="{bytes}""#));
2821    writeln!(out, "      <item{size}>").map_err(write_error)?;
2822    let name = source_name(source);
2823    writeln!(out, "        <name>{}</name>", XmlEscapedText::new(&name)?).map_err(write_error)?;
2824    writeln!(out, "      </item>").map_err(write_error)?;
2825    Ok(())
2826}
2827
2828fn write_xml_validation_report<W: Write>(
2829    status: ValidationStatus,
2830    profile: &ProfileReport,
2831    out: &mut W,
2832) -> Result<()> {
2833    writeln!(
2834        out,
2835        r#"      <validationReport profileName="{}" statement="{}" isCompliant="{}">"#,
2836        XmlEscapedAttr::new(profile.profile.name.as_str())?,
2837        XmlEscapedAttr::new(status_statement(status))?,
2838        profile.is_compliant,
2839    )
2840    .map_err(write_error)?;
2841    let failed_checks = u64::try_from(profile.failed_assertions.len()).unwrap_or(u64::MAX);
2842    let unsupported_rules = u64::try_from(profile.unsupported_rules.len()).unwrap_or(u64::MAX);
2843    let passed_checks = profile.checks_executed.saturating_sub(failed_checks);
2844    let passed_rules = profile.rules_executed.saturating_sub(profile.failed_rules);
2845    writeln!(
2846        out,
2847        r#"        <details passedRules="{passed_rules}" failedRules="{}" passedChecks="{passed_checks}" failedChecks="{failed_checks}" unsupportedRules="{unsupported_rules}"></details>"#,
2848        profile.failed_rules,
2849    )
2850    .map_err(write_error)?;
2851    write_xml_assertions("failedChecks", &profile.failed_assertions, out)?;
2852    write_xml_assertions("passedChecks", &profile.passed_assertions, out)?;
2853    write_xml_unsupported_rules(&profile.unsupported_rules, out)?;
2854    writeln!(out, "      </validationReport>").map_err(write_error)?;
2855    Ok(())
2856}
2857
2858fn write_xml_assertions<W: Write>(
2859    element: &str,
2860    assertions: &[Assertion],
2861    out: &mut W,
2862) -> Result<()> {
2863    if assertions.is_empty() {
2864        return Ok(());
2865    }
2866    writeln!(out, "        <{element}>").map_err(write_error)?;
2867    for assertion in assertions {
2868        writeln!(
2869            out,
2870            r#"          <check ruleId="{}" status="{}" location="{}">"#,
2871            XmlEscapedAttr::new(assertion.rule_id.0.as_str())?,
2872            assertion_status_text(assertion.status),
2873            XmlEscapedAttr::new(&location_text(&assertion.location))?,
2874        )
2875        .map_err(write_error)?;
2876        writeln!(
2877            out,
2878            "            <description>{}</description>",
2879            XmlEscapedText::new(assertion.description.as_str())?,
2880        )
2881        .map_err(write_error)?;
2882        if let Some(message) = &assertion.message {
2883            writeln!(
2884                out,
2885                "            <message>{}</message>",
2886                XmlEscapedText::new(message.as_str())?,
2887            )
2888            .map_err(write_error)?;
2889        }
2890        if !assertion.error_arguments.is_empty() {
2891            writeln!(out, "            <errorArguments>").map_err(write_error)?;
2892            for argument in &assertion.error_arguments {
2893                writeln!(
2894                    out,
2895                    r#"              <argument name="{}">{}</argument>"#,
2896                    XmlEscapedAttr::new(argument.name.as_str())?,
2897                    XmlEscapedText::new(argument.value.as_str())?,
2898                )
2899                .map_err(write_error)?;
2900            }
2901            writeln!(out, "            </errorArguments>").map_err(write_error)?;
2902        }
2903        writeln!(out, "          </check>").map_err(write_error)?;
2904    }
2905    writeln!(out, "        </{element}>").map_err(write_error)?;
2906    Ok(())
2907}
2908
2909fn write_xml_unsupported_rules<W: Write>(rules: &[UnsupportedRule], out: &mut W) -> Result<()> {
2910    if rules.is_empty() {
2911        return Ok(());
2912    }
2913    writeln!(out, "        <unsupportedRules>").map_err(write_error)?;
2914    for rule in rules {
2915        writeln!(
2916            out,
2917            r#"          <rule profileId="{}" ruleId="{}">"#,
2918            XmlEscapedAttr::new(rule.profile_id.as_str())?,
2919            XmlEscapedAttr::new(rule.rule_id.0.as_str())?,
2920        )
2921        .map_err(write_error)?;
2922        if let Some(fragment) = &rule.expression_fragment {
2923            writeln!(
2924                out,
2925                "            <expression>{}</expression>",
2926                XmlEscapedText::new(fragment.as_str())?,
2927            )
2928            .map_err(write_error)?;
2929        }
2930        writeln!(
2931            out,
2932            "            <reason>{}</reason>",
2933            XmlEscapedText::new(rule.reason.as_str())?,
2934        )
2935        .map_err(write_error)?;
2936        if !rule.references.is_empty() {
2937            writeln!(out, "            <references>").map_err(write_error)?;
2938            for reference in &rule.references {
2939                writeln!(
2940                    out,
2941                    r#"              <reference specification="{}" clause="{}"></reference>"#,
2942                    XmlEscapedAttr::new(reference.specification.as_str())?,
2943                    XmlEscapedAttr::new(reference.clause.as_str())?,
2944                )
2945                .map_err(write_error)?;
2946            }
2947            writeln!(out, "            </references>").map_err(write_error)?;
2948        }
2949        writeln!(out, "          </rule>").map_err(write_error)?;
2950    }
2951    writeln!(out, "        </unsupportedRules>").map_err(write_error)?;
2952    Ok(())
2953}
2954
2955fn write_xml_feature_report<W: Write>(report: &FeatureReport, out: &mut W) -> Result<()> {
2956    writeln!(
2957        out,
2958        r#"      <featureReport visitedObjects="{}" extractedObjects="{}" truncated="{}">"#,
2959        report.visited_objects,
2960        report.objects.len(),
2961        report.truncated,
2962    )
2963    .map_err(write_error)?;
2964    for object in &report.objects {
2965        writeln!(
2966            out,
2967            r#"        <featureObject family="{}" location="{}">"#,
2968            XmlEscapedAttr::new(object.family.as_str())?,
2969            XmlEscapedAttr::new(&location_text(&object.location))?,
2970        )
2971        .map_err(write_error)?;
2972        for (name, value) in &object.properties {
2973            writeln!(
2974                out,
2975                r#"          <property name="{}">"#,
2976                XmlEscapedAttr::new(name.as_str())?,
2977            )
2978            .map_err(write_error)?;
2979            write_xml_feature_value(value, out, 12)?;
2980            writeln!(out, "          </property>").map_err(write_error)?;
2981        }
2982        writeln!(out, "        </featureObject>").map_err(write_error)?;
2983    }
2984    writeln!(out, "      </featureReport>").map_err(write_error)?;
2985    Ok(())
2986}
2987
2988fn write_xml_policy_report<W: Write>(report: &PolicyReport, out: &mut W) -> Result<()> {
2989    writeln!(
2990        out,
2991        r#"      <policyReport name="{}" isCompliant="{}">"#,
2992        XmlEscapedAttr::new(report.name.as_ref().map_or("", BoundedText::as_str))?,
2993        report.is_compliant,
2994    )
2995    .map_err(write_error)?;
2996    for result in &report.results {
2997        writeln!(
2998            out,
2999            r#"        <rule id="{}" passed="{}" matches="{}">"#,
3000            XmlEscapedAttr::new(result.id.as_str())?,
3001            result.passed,
3002            result.matches,
3003        )
3004        .map_err(write_error)?;
3005        writeln!(
3006            out,
3007            "          <description>{}</description>",
3008            XmlEscapedText::new(result.description.as_str())?,
3009        )
3010        .map_err(write_error)?;
3011        writeln!(
3012            out,
3013            "          <message>{}</message>",
3014            XmlEscapedText::new(result.message.as_str())?,
3015        )
3016        .map_err(write_error)?;
3017        writeln!(out, "        </rule>").map_err(write_error)?;
3018    }
3019    writeln!(out, "      </policyReport>").map_err(write_error)?;
3020    Ok(())
3021}
3022
3023fn reference_suffix(references: &[SpecReference]) -> String {
3024    let Some(reference) = references.first() else {
3025        return String::new();
3026    };
3027    format!(
3028        " [{} {}]",
3029        reference.specification.as_str(),
3030        reference.clause.as_str()
3031    )
3032}
3033
3034fn write_xml_feature_value<W: Write>(
3035    value: &FeatureValue,
3036    out: &mut W,
3037    indent: usize,
3038) -> Result<()> {
3039    let spaces = " ".repeat(indent);
3040    match value {
3041        FeatureValue::Null => writeln!(out, r#"{spaces}<value type="null"></value>"#),
3042        FeatureValue::Bool(value) => {
3043            writeln!(out, r#"{spaces}<value type="bool">{value}</value>"#)
3044        }
3045        FeatureValue::Number(value) => {
3046            writeln!(out, r#"{spaces}<value type="number">{value}</value>"#)
3047        }
3048        FeatureValue::String(value) => writeln!(
3049            out,
3050            r#"{spaces}<value type="string">{}</value>"#,
3051            XmlEscapedText::new(value.as_str())?,
3052        ),
3053        FeatureValue::RedactedString { bytes } => writeln!(
3054            out,
3055            r#"{spaces}<value type="redactedString" bytes="{bytes}"></value>"#
3056        ),
3057        FeatureValue::ObjectKey(value) => writeln!(
3058            out,
3059            r#"{spaces}<value type="objectKey" number="{}" generation="{}"></value>"#,
3060            value.number, value.generation,
3061        ),
3062        FeatureValue::List(values) => {
3063            writeln!(out, r#"{spaces}<value type="list">"#).map_err(write_error)?;
3064            for item in values {
3065                write_xml_feature_value(item, out, indent.saturating_add(2))?;
3066            }
3067            writeln!(out, "{spaces}</value>")
3068        }
3069    }
3070    .map_err(write_error)?;
3071    Ok(())
3072}
3073
3074fn write_xml_parse_facts<W: Write>(facts: &[ParseFact], out: &mut W) -> Result<()> {
3075    if facts.is_empty() {
3076        return Ok(());
3077    }
3078    writeln!(out, "      <parseFacts>").map_err(write_error)?;
3079    for fact in facts {
3080        match fact {
3081            ParseFact::Header {
3082                offset,
3083                version,
3084                had_leading_bytes,
3085            } => writeln!(
3086                out,
3087                r#"        <header offset="{offset}" version="{}.{}" hadLeadingBytes="{had_leading_bytes}"></header>"#,
3088                version.major,
3089                version.minor,
3090            )
3091            .map_err(write_error)?,
3092            ParseFact::PostEofData { bytes } => {
3093                writeln!(out, r#"        <postEofData bytes="{bytes}"></postEofData>"#)
3094                    .map_err(write_error)?;
3095            }
3096            ParseFact::Xref { section, fact } => writeln!(
3097                out,
3098                r#"        <xref location="{}" fact="{}"></xref>"#,
3099                XmlEscapedAttr::new(&location_text(section))?,
3100                XmlEscapedAttr::new(&xref_fact_text(fact))?,
3101            )
3102            .map_err(write_error)?,
3103            ParseFact::Stream { object, fact } => writeln!(
3104                out,
3105                r#"        <stream object="{} {}" fact="{}"></stream>"#,
3106                object.number,
3107                object.generation,
3108                XmlEscapedAttr::new(&stream_fact_text(fact))?,
3109            )
3110            .map_err(write_error)?,
3111            ParseFact::Encryption {
3112                encrypted,
3113                handler,
3114                version,
3115                revision,
3116                algorithm,
3117                decrypted,
3118            } => writeln!(
3119                out,
3120                r#"        <encryption encrypted="{encrypted}" handler="{}" version="{}" revision="{}" algorithm="{}" decrypted="{decrypted}"></encryption>"#,
3121                XmlEscapedAttr::new(handler.as_ref().map_or("", Identifier::as_str))?,
3122                version.map_or_else(String::new, |value| value.to_string()),
3123                revision.map_or_else(String::new, |value| value.to_string()),
3124                XmlEscapedAttr::new(algorithm.as_ref().map_or("", Identifier::as_str))?,
3125            )
3126            .map_err(write_error)?,
3127            ParseFact::Xmp { object, fact } => writeln!(
3128                out,
3129                r#"        <xmp object="{} {}" fact="{}"></xmp>"#,
3130                object.number,
3131                object.generation,
3132                XmlEscapedAttr::new(&xmp_fact_text(fact))?,
3133            )
3134            .map_err(write_error)?,
3135        }
3136    }
3137    writeln!(out, "      </parseFacts>").map_err(write_error)?;
3138    Ok(())
3139}
3140
3141fn write_xml_warnings<W: Write>(
3142    warnings: &[ValidationWarning],
3143    out: &mut W,
3144    indent: usize,
3145) -> Result<()> {
3146    if warnings.is_empty() {
3147        return Ok(());
3148    }
3149    let spaces = " ".repeat(indent);
3150    writeln!(out, "{spaces}<warnings>").map_err(write_error)?;
3151    for warning in warnings {
3152        writeln!(
3153            out,
3154            "{spaces}  <warning>{}</warning>",
3155            XmlEscapedText::new(&warning_text(warning))?,
3156        )
3157        .map_err(write_error)?;
3158    }
3159    writeln!(out, "{spaces}</warnings>").map_err(write_error)?;
3160    Ok(())
3161}
3162
3163fn write_xml_batch_summary<W: Write>(summary: &BatchSummary, out: &mut W) -> Result<()> {
3164    writeln!(
3165        out,
3166        r#"  <batchSummary totalJobs="{}" failedToParse="{}" encrypted="{}" incomplete="{}" internalErrors="{}">"#,
3167        summary.total_files,
3168        summary.parse_failures,
3169        summary.encrypted,
3170        summary.incomplete,
3171        summary.internal_errors,
3172    )
3173    .map_err(write_error)?;
3174    writeln!(
3175        out,
3176        r#"    <validationReports compliant="{}" nonCompliant="{}" failedJobs="{}">{}</validationReports>"#,
3177        summary.valid,
3178        summary.invalid,
3179        summary
3180            .parse_failures
3181            .saturating_add(summary.encrypted)
3182            .saturating_add(summary.incomplete)
3183            .saturating_add(summary.internal_errors),
3184        summary.valid.saturating_add(summary.invalid),
3185    )
3186    .map_err(write_error)?;
3187    writeln!(
3188        out,
3189        r#"    <duration elapsedMillis="{}"></duration>"#,
3190        summary.elapsed_millis,
3191    )
3192    .map_err(write_error)?;
3193    writeln!(out, "  </batchSummary>").map_err(write_error)?;
3194    Ok(())
3195}
3196
3197#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
3198struct CheckCounts {
3199    passed: u64,
3200    failed: u64,
3201    unsupported: u64,
3202}
3203
3204fn check_counts(report: &ValidationReport) -> CheckCounts {
3205    report
3206        .profile_reports
3207        .iter()
3208        .fold(CheckCounts::default(), |mut counts, profile| {
3209            let failed = profile.failed_rules;
3210            let unsupported = u64::try_from(profile.unsupported_rules.len()).unwrap_or(u64::MAX);
3211            counts.failed = counts.failed.saturating_add(failed);
3212            counts.unsupported = counts.unsupported.saturating_add(unsupported);
3213            counts.passed = counts
3214                .passed
3215                .saturating_add(profile.checks_executed.saturating_sub(failed));
3216            counts
3217        })
3218}
3219
3220fn profile_list(report: &ValidationReport) -> String {
3221    let profiles = report
3222        .profile_reports
3223        .iter()
3224        .map(|profile| profile.profile.id.as_str())
3225        .collect::<Vec<_>>();
3226    if profiles.is_empty() {
3227        String::from("-")
3228    } else {
3229        profiles.join(", ")
3230    }
3231}
3232
3233fn source_name(source: &InputSummary) -> String {
3234    source.path.as_ref().map_or_else(
3235        || String::from("<memory>"),
3236        |path| path.display().to_string(),
3237    )
3238}
3239
3240fn status_text(status: ValidationStatus) -> &'static str {
3241    match status {
3242        ValidationStatus::Valid => "valid",
3243        ValidationStatus::Invalid => "invalid",
3244        ValidationStatus::Encrypted => "encrypted",
3245        ValidationStatus::Incomplete => "incomplete",
3246        ValidationStatus::ParseFailed => "parse failed",
3247    }
3248}
3249
3250fn repair_status_text(status: RepairStatus) -> &'static str {
3251    match status {
3252        RepairStatus::Succeeded => "succeeded",
3253        RepairStatus::NoAction => "no action",
3254        RepairStatus::Refused => "refused",
3255        RepairStatus::Failed => "failed",
3256    }
3257}
3258
3259fn repair_action_kind(action: &RepairAction) -> &'static str {
3260    match action {
3261        RepairAction::CopiedUnchanged => "copiedUnchanged",
3262        RepairAction::MetadataRewritten { .. } => "metadataRewritten",
3263    }
3264}
3265
3266fn repair_action_text(action: &RepairAction) -> String {
3267    match action {
3268        RepairAction::CopiedUnchanged => String::from("copied unchanged"),
3269        RepairAction::MetadataRewritten { description } => description.as_str().to_owned(),
3270    }
3271}
3272
3273fn repair_refusal_kind(refusal: &RepairRefusal) -> &'static str {
3274    match refusal {
3275        RepairRefusal::ParseFailed { .. } => "parseFailed",
3276        RepairRefusal::Encrypted => "encrypted",
3277        RepairRefusal::AmbiguousFlavour { .. } => "ambiguousFlavour",
3278        RepairRefusal::UnsupportedValidationStatus { .. } => "unsupportedValidationStatus",
3279        RepairRefusal::OutputWouldModifyInput => "outputWouldModifyInput",
3280        RepairRefusal::InvalidOutputPath { .. } => "invalidOutputPath",
3281    }
3282}
3283
3284fn repair_refusal_text(refusal: &RepairRefusal) -> String {
3285    match refusal {
3286        RepairRefusal::ParseFailed { reason } => {
3287            format!("input could not be parsed: {}", reason.as_str())
3288        }
3289        RepairRefusal::Encrypted => String::from("encrypted inputs are not repaired"),
3290        RepairRefusal::AmbiguousFlavour { selected } => {
3291            format!("repair requires exactly one selected flavour, got {selected}")
3292        }
3293        RepairRefusal::UnsupportedValidationStatus { status } => {
3294            format!(
3295                "metadata repair is unsupported for {} inputs",
3296                status_text(*status)
3297            )
3298        }
3299        RepairRefusal::OutputWouldModifyInput => {
3300            String::from("output path would modify input in place")
3301        }
3302        RepairRefusal::InvalidOutputPath { reason } => reason.as_str().to_owned(),
3303    }
3304}
3305
3306fn exit_category_text(category: ExitCategory) -> &'static str {
3307    match category {
3308        ExitCategory::Success => "success",
3309        ExitCategory::ValidationFailed => "validation failed",
3310        ExitCategory::ProcessingFailed => "processing failed",
3311        ExitCategory::InternalError => "internal error",
3312    }
3313}
3314
3315fn location_text(location: &ObjectLocation) -> String {
3316    if let Some(path) = &location.path {
3317        return path.to_string();
3318    }
3319    if let Some(object) = location.object {
3320        return format!("object {} {}", object.number, object.generation);
3321    }
3322    if let Some(offset) = location.offset {
3323        return format!("offset {offset}");
3324    }
3325    String::from("unknown")
3326}
3327
3328fn assertion_message(assertion: &Assertion) -> &str {
3329    assertion
3330        .message
3331        .as_ref()
3332        .unwrap_or(&assertion.description)
3333        .as_str()
3334}
3335
3336fn assertion_status_text(status: AssertionStatus) -> &'static str {
3337    match status {
3338        AssertionStatus::Passed => "passed",
3339        AssertionStatus::Failed => "failed",
3340    }
3341}
3342
3343fn status_statement(status: ValidationStatus) -> &'static str {
3344    match status {
3345        ValidationStatus::Valid => "PDF file is compliant with Validation Profile requirements.",
3346        ValidationStatus::Invalid => {
3347            "PDF file is not compliant with Validation Profile requirements."
3348        }
3349        ValidationStatus::Encrypted => "PDF file is encrypted and could not be validated.",
3350        ValidationStatus::Incomplete => "Validation did not complete for all required rules.",
3351        ValidationStatus::ParseFailed => "PDF file could not be parsed.",
3352    }
3353}
3354
3355fn xref_fact_text(fact: &XrefFact) -> String {
3356    match fact {
3357        XrefFact::EolMarkersComply => String::from("eolMarkersComply"),
3358        XrefFact::MalformedClassic => String::from("malformedClassic"),
3359        XrefFact::XrefStreamUnsupported => String::from("xrefStreamUnsupported"),
3360        XrefFact::XrefStreamParsed {
3361            entries,
3362            compressed_entries,
3363        } => format!("xrefStreamParsed entries={entries} compressedEntries={compressed_entries}"),
3364        XrefFact::PrevChain { offset } => format!("prevChain offset={offset}"),
3365        XrefFact::HybridReference { offset } => format!("hybridReference offset={offset}"),
3366        XrefFact::ObjectStreamParsed => String::from("objectStreamParsed"),
3367    }
3368}
3369
3370fn stream_fact_text(fact: &StreamFact) -> String {
3371    match fact {
3372        StreamFact::Length {
3373            declared,
3374            discovered,
3375        } => format!("length declared={declared} discovered={discovered}"),
3376        StreamFact::KeywordSpacing {
3377            stream_keyword_crlf_compliant,
3378            endstream_keyword_eol_compliant,
3379        } => format!(
3380            "keywordSpacing streamKeywordCRLFCompliant={stream_keyword_crlf_compliant} \
3381             endstreamKeywordEolCompliant={endstream_keyword_eol_compliant}"
3382        ),
3383        StreamFact::Decoded { bytes } => format!("decoded bytes={bytes}"),
3384        StreamFact::FilterDecoded {
3385            filter,
3386            input_bytes,
3387            output_bytes,
3388        } => format!(
3389            "filterDecoded filter={} inputBytes={input_bytes} outputBytes={output_bytes}",
3390            filter.as_str()
3391        ),
3392        StreamFact::FilterMetadataMode { filter, bytes } => {
3393            format!(
3394                "filterMetadataMode filter={} bytes={bytes}",
3395                filter.as_str()
3396            )
3397        }
3398    }
3399}
3400
3401fn xmp_fact_text(fact: &XmpFact) -> String {
3402    match fact {
3403        XmpFact::PacketParsed {
3404            bytes,
3405            namespaces,
3406            claims,
3407        } => format!("packetParsed bytes={bytes} namespaces={namespaces} claims={claims}"),
3408        XmpFact::MissingPacketWrapper => String::from("missingPacketWrapper"),
3409        XmpFact::FlavourClaim {
3410            family,
3411            display_flavour,
3412            namespace_uri,
3413        } => format!(
3414            "flavourClaim family={} displayFlavour={} namespaceUri={}",
3415            family.as_str(),
3416            display_flavour.as_str(),
3417            namespace_uri.as_str()
3418        ),
3419        XmpFact::Malformed { reason } => format!("malformed reason={}", reason.as_str()),
3420        XmpFact::HostileXmlRejected { reason } => {
3421            format!("hostileXmlRejected reason={}", reason.as_str())
3422        }
3423    }
3424}
3425
3426fn warning_text(warning: &ValidationWarning) -> String {
3427    warning.message_text().to_string()
3428}
3429
3430fn duration_millis(duration: Duration) -> u64 {
3431    u64::try_from(duration.as_millis()).unwrap_or(u64::MAX)
3432}
3433
3434fn write_error(source: std::io::Error) -> PdfvError {
3435    ReportError::Write { source }.into()
3436}
3437
3438fn format_optional_path(path: Option<&PathBuf>) -> String {
3439    path.map(|path| format!(" at {}", path.display()))
3440        .unwrap_or_default()
3441}
3442
3443#[derive(Clone, Copy, Debug)]
3444struct XmlEscapedText<'a>(&'a str);
3445
3446impl<'a> XmlEscapedText<'a> {
3447    fn new(value: &'a str) -> Result<Self> {
3448        ensure_xml_text(value)?;
3449        Ok(Self(value))
3450    }
3451}
3452
3453impl fmt::Display for XmlEscapedText<'_> {
3454    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
3455        for character in self.0.chars() {
3456            match character {
3457                '&' => formatter.write_str("&amp;")?,
3458                '<' => formatter.write_str("&lt;")?,
3459                '>' => formatter.write_str("&gt;")?,
3460                '"' => formatter.write_str("&quot;")?,
3461                '\'' => formatter.write_str("&apos;")?,
3462                _ => formatter.write_str(character.encode_utf8(&mut [0; 4]))?,
3463            }
3464        }
3465        Ok(())
3466    }
3467}
3468
3469#[derive(Clone, Copy, Debug)]
3470struct XmlEscapedAttr<'a>(&'a str);
3471
3472impl<'a> XmlEscapedAttr<'a> {
3473    fn new(value: &'a str) -> Result<Self> {
3474        ensure_xml_text(value)?;
3475        Ok(Self(value))
3476    }
3477}
3478
3479impl fmt::Display for XmlEscapedAttr<'_> {
3480    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
3481        XmlEscapedText(self.0).fmt(formatter)
3482    }
3483}
3484
3485#[derive(Clone, Copy, Debug)]
3486struct HtmlEscapedText<'a>(&'a str);
3487
3488impl<'a> HtmlEscapedText<'a> {
3489    fn new(value: &'a str) -> Result<Self> {
3490        ensure_xml_text(value)?;
3491        Ok(Self(value))
3492    }
3493}
3494
3495impl fmt::Display for HtmlEscapedText<'_> {
3496    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
3497        XmlEscapedText(self.0).fmt(formatter)
3498    }
3499}
3500
3501fn ensure_xml_text(value: &str) -> Result<()> {
3502    if value.chars().all(is_xml_char) {
3503        return Ok(());
3504    }
3505    Err(ReportError::Xml {
3506        message: BoundedText::unchecked("text contains characters forbidden by XML 1.0"),
3507    }
3508    .into())
3509}
3510
3511fn is_xml_char(character: char) -> bool {
3512    matches!(character, '\u{09}' | '\u{0A}' | '\u{0D}')
3513        || ('\u{20}'..='\u{D7FF}').contains(&character)
3514        || ('\u{E000}'..='\u{FFFD}').contains(&character)
3515        || ('\u{10000}'..='\u{10FFFF}').contains(&character)
3516}
3517
3518#[cfg(test)]
3519mod tests {
3520    use std::{
3521        collections::BTreeMap,
3522        error::Error as StdError,
3523        num::{NonZeroU32, NonZeroU64},
3524        path::PathBuf,
3525        time::Duration,
3526    };
3527
3528    use super::{
3529        Assertion, AssertionStatus, BatchReport, BoundedText, ErrorArgument, ExitCategory,
3530        FeatureObject, FeatureReport, FeatureValue, HtmlReportWriter, Identifier, InputKind,
3531        InputSummary, JsonReportWriter, MaxDisplayedFailures, MetadataRepairOptions,
3532        MetadataRepairer, ObjectLocation, ObjectTypeName, PdfVersion, PolicyReport,
3533        PolicyRuleResult, ProfileIdentity, ProfileReport, PropertyName, RawXmlReportWriter,
3534        RepairAction, RepairBatchReport, RepairRefusal, RepairReport, RepairStatus, ReportFormat,
3535        ReportWriter, RuleId, TextReportWriter, ValidationOptions, ValidationReport,
3536        ValidationStatus, XmlReportWriter,
3537    };
3538
3539    fn sample_report() -> std::result::Result<ValidationReport, Box<dyn StdError>> {
3540        let profile_id = Identifier::new("pdfa-1b")?;
3541        let rule_id = RuleId(Identifier::new("6.1.2-1")?);
3542        Ok(ValidationReport::builder()
3543            .engine_version("0.1.0".to_owned())
3544            .source(InputSummary::new(InputKind::Memory, None, Some(42)))
3545            .status(ValidationStatus::Invalid)
3546            .flavours(vec![super::ValidationFlavour::new(
3547                "pdfa",
3548                NonZeroU32::MIN,
3549                "b",
3550            )?])
3551            .profile_reports(vec![
3552                ProfileReport::builder()
3553                    .profile(ProfileIdentity {
3554                        id: profile_id.clone(),
3555                        name: BoundedText::new("PDF/A-1B", 64)?,
3556                        version: None,
3557                    })
3558                    .is_compliant(false)
3559                    .checks_executed(1)
3560                    .rules_executed(1)
3561                    .failed_rules(1)
3562                    .failed_assertions(vec![Assertion {
3563                        ordinal: NonZeroU64::MIN,
3564                        rule_id,
3565                        status: AssertionStatus::Failed,
3566                        description: BoundedText::new("Header must start at byte zero", 128)?,
3567                        location: ObjectLocation {
3568                            object: None,
3569                            offset: Some(0),
3570                            path: None,
3571                        },
3572                        object_context: None,
3573                        message: Some(BoundedText::new("Header offset is non-zero", 128)?),
3574                        error_arguments: vec![ErrorArgument {
3575                            name: Identifier::new("offset")?,
3576                            value: BoundedText::new("12", 16)?,
3577                        }],
3578                    }])
3579                    .passed_assertions(Vec::new())
3580                    .unsupported_rules(Vec::new())
3581                    .build(),
3582            ])
3583            .parse_facts(vec![super::ParseFact::Header {
3584                offset: 12,
3585                version: PdfVersion { major: 1, minor: 7 },
3586                had_leading_bytes: true,
3587            }])
3588            .warnings(Vec::new())
3589            .task_durations(Vec::new())
3590            .build())
3591    }
3592
3593    fn sample_feature_policy_report() -> std::result::Result<ValidationReport, Box<dyn StdError>> {
3594        let mut report = sample_report()?;
3595        let mut properties = BTreeMap::new();
3596        properties.insert(PropertyName::new("hasMetadata")?, FeatureValue::Bool(false));
3597        report.feature_report = Some(
3598            FeatureReport::builder()
3599                .objects(vec![
3600                    FeatureObject::builder()
3601                        .family(ObjectTypeName::new("catalog".to_owned())?)
3602                        .location(ObjectLocation {
3603                            object: None,
3604                            offset: None,
3605                            path: Some(BoundedText::new("root/catalog[0]", 64)?),
3606                        })
3607                        .context(BoundedText::new("root/catalog[0]", 64)?)
3608                        .properties(properties)
3609                        .build(),
3610                ])
3611                .visited_objects(1)
3612                .selected_families(vec![ObjectTypeName::new("catalog".to_owned())?])
3613                .truncated(false)
3614                .build(),
3615        );
3616        report.policy_report = Some(
3617            PolicyReport::builder()
3618                .name(Some(BoundedText::new("catalog-policy", 64)?))
3619                .is_compliant(true)
3620                .results(vec![
3621                    PolicyRuleResult::builder()
3622                        .id(Identifier::new("catalog-has-no-metadata")?)
3623                        .description(BoundedText::new("Catalog metadata is absent", 128)?)
3624                        .passed(true)
3625                        .matches(1)
3626                        .message(BoundedText::new(
3627                            "policy rule catalog-has-no-metadata passed with 1 matching feature \
3628                             objects",
3629                            128,
3630                        )?)
3631                        .build(),
3632                ])
3633                .build(),
3634        );
3635        Ok(report)
3636    }
3637
3638    fn sample_repair_report() -> RepairReport {
3639        RepairReport::builder()
3640            .engine_version("0.1.0".to_owned())
3641            .source(InputSummary::new(
3642                InputKind::File,
3643                Some(PathBuf::from("input.pdf")),
3644                Some(42),
3645            ))
3646            .output_path(Some(PathBuf::from("out/repaired-input.pdf")))
3647            .status(RepairStatus::NoAction)
3648            .actions(vec![RepairAction::CopiedUnchanged])
3649            .refusal(None)
3650            .warnings(Vec::new())
3651            .task_durations(Vec::new())
3652            .build()
3653    }
3654
3655    #[test]
3656    fn test_should_apply_validation_options_defaults() {
3657        let options = ValidationOptions::default();
3658
3659        assert!(options.report_parse_warnings);
3660        assert!(!options.record_passed_assertions);
3661        assert_eq!(options.max_failed_assertions_per_rule.get(), 1);
3662    }
3663
3664    #[test]
3665    fn test_should_reject_zero_max_displayed_failures() {
3666        let result = MaxDisplayedFailures::try_from(0);
3667
3668        assert!(result.is_err());
3669    }
3670
3671    #[test]
3672    fn test_should_reject_invalid_identifier() {
3673        let result = Identifier::new("bad identifier");
3674
3675        assert!(result.is_err());
3676    }
3677
3678    #[test]
3679    fn test_should_serialize_validation_report_as_camel_case_json()
3680    -> std::result::Result<(), Box<dyn StdError>> {
3681        let report = sample_report()?;
3682        let json = serde_json::to_string_pretty(&report)?;
3683        let expected = r#"{
3684  "engineVersion": "0.1.0",
3685  "source": {
3686    "kind": "memory",
3687    "bytes": 42
3688  },
3689  "status": "invalid",
3690  "flavours": [
3691    {
3692      "family": "pdfa",
3693      "part": 1,
3694      "conformance": "b"
3695    }
3696  ],
3697  "profileReports": [
3698    {
3699      "profile": {
3700        "id": "pdfa-1b",
3701        "name": "PDF/A-1B"
3702      },
3703      "isCompliant": false,
3704      "checksExecuted": 1,
3705      "rulesExecuted": 1,
3706      "failedRules": 1,
3707      "failedAssertions": [
3708        {
3709          "ordinal": 1,
3710          "ruleId": "6.1.2-1",
3711          "status": "failed",
3712          "description": "Header must start at byte zero",
3713          "location": {
3714            "offset": 0
3715          },
3716          "message": "Header offset is non-zero",
3717          "errorArguments": [
3718            {
3719              "name": "offset",
3720              "value": "12"
3721            }
3722          ]
3723        }
3724      ],
3725      "passedAssertions": [],
3726      "unsupportedRules": []
3727    }
3728  ],
3729  "parseFacts": [
3730    {
3731      "kind": "header",
3732      "offset": 12,
3733      "version": {
3734        "major": 1,
3735        "minor": 7
3736      },
3737      "hadLeadingBytes": true
3738    }
3739  ],
3740  "warnings": [],
3741  "taskDurations": []
3742}"#;
3743
3744        assert_eq!(json, expected);
3745        Ok(())
3746    }
3747
3748    #[test]
3749    fn test_should_write_compact_json_report() -> std::result::Result<(), Box<dyn StdError>> {
3750        let report = sample_report()?;
3751        let mut output = Vec::new();
3752
3753        JsonReportWriter::compact()
3754            .write_report(&report, &mut output)
3755            .map_err(Box::<dyn StdError>::from)?;
3756
3757        let json = String::from_utf8(output)?;
3758        assert!(json.contains("\"engineVersion\":\"0.1.0\""));
3759        Ok(())
3760    }
3761
3762    #[test]
3763    fn test_should_write_text_report() -> std::result::Result<(), Box<dyn StdError>> {
3764        let report = sample_report()?;
3765        let mut output = Vec::new();
3766
3767        TextReportWriter
3768            .write_report(&report, &mut output)
3769            .map_err(Box::<dyn StdError>::from)?;
3770
3771        let text = String::from_utf8(output)?;
3772        let expected = "\
3773<memory>: invalid
3774profiles: pdfa-1b
3775checks: 0 passed, 1 failed, 0 unsupported
3776first failures:
3777  6.1.2-1 at offset 0: Header offset is non-zero
3778";
3779        assert_eq!(text, expected);
3780        Ok(())
3781    }
3782
3783    #[test]
3784    fn test_should_write_xml_report() -> std::result::Result<(), Box<dyn StdError>> {
3785        let report = sample_report()?;
3786        let mut output = Vec::new();
3787
3788        XmlReportWriter
3789            .write_report(&report, &mut output)
3790            .map_err(Box::<dyn StdError>::from)?;
3791
3792        let xml = String::from_utf8(output)?;
3793        assert!(xml.contains(r#"<?xml version="1.0" encoding="utf-8"?>"#));
3794        assert!(xml.contains("<report>"));
3795        assert!(xml.contains(r#"<validationReport profileName="PDF/A-1B""#));
3796        assert!(xml.contains(r#"<details passedRules="0" failedRules="1""#));
3797        assert!(xml.contains(r#"<check ruleId="6.1.2-1" status="failed" location="offset 0">"#));
3798        assert!(xml.contains(r#"<batchSummary totalJobs="1""#));
3799        Ok(())
3800    }
3801
3802    #[test]
3803    fn test_should_write_raw_xml_report_with_feature_and_policy_sections()
3804    -> std::result::Result<(), Box<dyn StdError>> {
3805        let report = sample_feature_policy_report()?;
3806        let mut output = Vec::new();
3807
3808        RawXmlReportWriter
3809            .write_report(&report, &mut output)
3810            .map_err(Box::<dyn StdError>::from)?;
3811
3812        let xml = String::from_utf8(output)?;
3813        let expected = format!(
3814            r#"<?xml version="1.0" encoding="utf-8"?>
3815<rawReport engine="pdfv-core" version="{version}">
3816  <processorConfig tasks="validation,features,policy"></processorConfig>
3817  <processorResults>
3818    <processorResult status="invalid">
3819      <item size="42">
3820        <name>&lt;memory&gt;</name>
3821      </item>
3822      <validationReport profileName="PDF/A-1B" statement="PDF file is not compliant with Validation Profile requirements." isCompliant="false">
3823        <details passedRules="0" failedRules="1" passedChecks="0" failedChecks="1" unsupportedRules="0"></details>
3824        <failedChecks>
3825          <check ruleId="6.1.2-1" status="failed" location="offset 0">
3826            <description>Header must start at byte zero</description>
3827            <message>Header offset is non-zero</message>
3828            <errorArguments>
3829              <argument name="offset">12</argument>
3830            </errorArguments>
3831          </check>
3832        </failedChecks>
3833      </validationReport>
3834      <featureReport visitedObjects="1" extractedObjects="1" truncated="false">
3835        <featureObject family="catalog" location="root/catalog[0]">
3836          <property name="hasMetadata">
3837            <value type="bool">false</value>
3838          </property>
3839        </featureObject>
3840      </featureReport>
3841      <policyReport name="catalog-policy" isCompliant="true">
3842        <rule id="catalog-has-no-metadata" passed="true" matches="1">
3843          <description>Catalog metadata is absent</description>
3844          <message>policy rule catalog-has-no-metadata passed with 1 matching feature objects</message>
3845        </rule>
3846      </policyReport>
3847      <parseFacts>
3848        <header offset="12" version="1.7" hadLeadingBytes="true"></header>
3849      </parseFacts>
3850    </processorResult>
3851  </processorResults>
3852  <batchSummary totalJobs="1" failedToParse="0" encrypted="0" incomplete="0" internalErrors="0">
3853    <validationReports compliant="0" nonCompliant="1" failedJobs="0">1</validationReports>
3854    <duration elapsedMillis="0"></duration>
3855  </batchSummary>
3856</rawReport>
3857"#,
3858            version = super::ENGINE_VERSION,
3859        );
3860        assert_eq!(xml, expected);
3861        Ok(())
3862    }
3863
3864    #[test]
3865    fn test_should_write_static_html_report() -> std::result::Result<(), Box<dyn StdError>> {
3866        let report = sample_report()?;
3867        let mut output = Vec::new();
3868
3869        HtmlReportWriter
3870            .write_report(&report, &mut output)
3871            .map_err(Box::<dyn StdError>::from)?;
3872
3873        let html = String::from_utf8(output)?;
3874        let expected = "\
3875<!doctype html>
3876<html lang=\"en\"><head><meta charset=\"utf-8\"><title>pdfv validation \
3877                        report</title><style>body{font-family:system-ui,sans-serif;margin:2rem;\
3878                        color:#1f2937}table{border-collapse:collapse;width:100%}th,td{border:1px \
3879                        solid #d1d5db;padding:.4rem;text-align:left}th{background:#f3f4f6}</\
3880                        style></head><body>
3881<h1>Validation Report</h1>
3882<p>0 valid, 1 invalid, 0 parse failed, 0 encrypted, 0 incomplete.</p>
3883<table><thead><tr><th>Input</th><th>Status</th><th>Profiles</th><th>Features</th><th>Policy</th></\
3884                        tr></thead><tbody>
3885<tr><td>&lt;memory&gt;</td><td>invalid</td><td>pdfa-1b</td><td>-</td><td>-</td></tr>
3886</tbody></table>
3887</body></html>
3888";
3889        assert_eq!(html, expected);
3890        Ok(())
3891    }
3892
3893    #[test]
3894    fn test_should_serialize_repair_report_and_summary()
3895    -> std::result::Result<(), Box<dyn StdError>> {
3896        let report = sample_repair_report();
3897        let json = serde_json::to_string_pretty(&report)?;
3898        assert!(json.contains(r#""status": "noAction""#));
3899        assert!(json.contains(r#""kind": "copiedUnchanged""#));
3900
3901        let refused = RepairReport::builder()
3902            .engine_version("0.1.0".to_owned())
3903            .source(InputSummary::new(
3904                InputKind::File,
3905                Some(PathBuf::from("bad.pdf")),
3906                None,
3907            ))
3908            .output_path(None)
3909            .status(RepairStatus::Refused)
3910            .actions(Vec::new())
3911            .refusal(Some(RepairRefusal::Encrypted))
3912            .warnings(Vec::new())
3913            .task_durations(Vec::new())
3914            .build();
3915        let batch =
3916            RepairBatchReport::from_items(vec![report, refused], Vec::new(), Duration::ZERO);
3917
3918        assert_eq!(batch.summary.no_action, 1);
3919        assert_eq!(batch.summary.refused, 1);
3920        assert_eq!(
3921            batch.summary.worst_exit_category,
3922            ExitCategory::ProcessingFailed
3923        );
3924        Ok(())
3925    }
3926
3927    #[test]
3928    fn test_should_write_repair_raw_xml_and_html() -> std::result::Result<(), Box<dyn StdError>> {
3929        let report = sample_repair_report();
3930        let mut raw = Vec::new();
3931        let mut html = Vec::new();
3932
3933        ReportFormat::RawXml
3934            .write_repair_report(&report, &mut raw)
3935            .map_err(Box::<dyn StdError>::from)?;
3936        ReportFormat::Html
3937            .write_repair_report(&report, &mut html)
3938            .map_err(Box::<dyn StdError>::from)?;
3939
3940        let raw = String::from_utf8(raw)?;
3941        let html = String::from_utf8(html)?;
3942        let expected_raw = format!(
3943            r#"<?xml version="1.0" encoding="utf-8"?>
3944<rawRepairReport engine="pdfv-core" version="{version}">
3945  <processorConfig tasks="metadata"></processorConfig>
3946  <items>
3947    <repairItem status="no action">
3948      <item size="42">
3949        <name>input.pdf</name>
3950      </item>
3951      <output>out/repaired-input.pdf</output>
3952      <actions>
3953        <action kind="copiedUnchanged">copied unchanged</action>
3954      </actions>
3955    </repairItem>
3956  </items>
3957  <repairSummary totalJobs="1" succeeded="0" noAction="1" refused="0" failed="0" elapsedMillis="0"></repairSummary>
3958</rawRepairReport>
3959"#,
3960            version = super::ENGINE_VERSION,
3961        );
3962        let expected_html =
3963            "\
3964<!doctype html>
3965<html lang=\"en\"><head><meta charset=\"utf-8\"><title>pdfv metadata repair \
3966             report</title><style>body{font-family:system-ui,sans-serif;margin:2rem;color:#\
3967             1f2937}table{border-collapse:collapse;width:100%}th,td{border:1px solid \
3968             #d1d5db;padding:.4rem;text-align:left}th{background:#f3f4f6}</style></head><body>
3969<h1>Metadata Repair Report</h1>
3970<p>0 repaired, 1 unchanged, 0 refused, 0 failed.</p>
3971<table><thead><tr><th>Input</th><th>Status</th><th>Output</th><th>Reason</th></tr></thead><tbody>
3972<tr><td>input.pdf</td><td>no action</td><td>out/repaired-input.pdf</td><td></td></tr>
3973</tbody></table>
3974</body></html>
3975";
3976        assert_eq!(raw, expected_raw);
3977        assert_eq!(html, expected_html);
3978        Ok(())
3979    }
3980
3981    #[test]
3982    #[allow(
3983        clippy::disallowed_methods,
3984        reason = "unit test creates local repair files synchronously"
3985    )]
3986    fn test_should_refuse_repair_when_output_already_exists_without_removing_it()
3987    -> std::result::Result<(), Box<dyn StdError>> {
3988        let temp = tempfile::tempdir()?;
3989        let input = temp.path().join("input.pdf");
3990        let output_dir = temp.path().join("out");
3991        let output = output_dir.join("input.pdf");
3992        std::fs::create_dir(&output_dir)?;
3993        std::fs::write(&input, b"not a valid pdf")?;
3994        std::fs::write(&output, b"existing output")?;
3995        let repairer = MetadataRepairer::new(MetadataRepairOptions::new(
3996            ValidationOptions::default(),
3997            &output_dir,
3998            "",
3999        )?)?;
4000
4001        let report = repairer.repair_path(&input)?;
4002
4003        assert_eq!(report.status, RepairStatus::Refused);
4004        assert!(matches!(
4005            report.refusal,
4006            Some(RepairRefusal::InvalidOutputPath { .. })
4007        ));
4008        assert_eq!(std::fs::read(&output)?, b"existing output");
4009        Ok(())
4010    }
4011
4012    #[test]
4013    fn test_should_reject_xml_forbidden_text() -> std::result::Result<(), Box<dyn StdError>> {
4014        let mut report = sample_report()?;
4015        let Some(profile) = report.profile_reports.first_mut() else {
4016            return Err("sample report must contain profile".into());
4017        };
4018        profile.profile.name = BoundedText::unchecked("bad\u{1}profile");
4019        let mut output = Vec::new();
4020
4021        let result = XmlReportWriter.write_report(&report, &mut output);
4022
4023        assert!(matches!(
4024            result,
4025            Err(super::PdfvError::Report(super::ReportError::Xml { .. }))
4026        ));
4027        Ok(())
4028    }
4029
4030    #[test]
4031    fn test_should_dispatch_pretty_json_report_format() -> std::result::Result<(), Box<dyn StdError>>
4032    {
4033        let report = sample_report()?;
4034        let mut output = Vec::new();
4035
4036        ReportFormat::JsonPretty
4037            .write_report(&report, &mut output)
4038            .map_err(Box::<dyn StdError>::from)?;
4039
4040        let json = String::from_utf8(output)?;
4041        assert!(json.contains("\n  \"engineVersion\": \"0.1.0\""));
4042        Ok(())
4043    }
4044
4045    #[test]
4046    fn test_should_dispatch_xml_report_format() -> std::result::Result<(), Box<dyn StdError>> {
4047        let report = sample_report()?;
4048        let mut output = Vec::new();
4049
4050        ReportFormat::Xml
4051            .write_report(&report, &mut output)
4052            .map_err(Box::<dyn StdError>::from)?;
4053
4054        let xml = String::from_utf8(output)?;
4055        assert!(xml.contains("<validationReport"));
4056        Ok(())
4057    }
4058
4059    #[test]
4060    fn test_should_compute_batch_summary() -> std::result::Result<(), Box<dyn StdError>> {
4061        let valid = ValidationReport::builder()
4062            .engine_version("0.1.0".to_owned())
4063            .source(InputSummary::new(InputKind::Memory, None, Some(42)))
4064            .status(ValidationStatus::Valid)
4065            .flavours(Vec::new())
4066            .profile_reports(Vec::new())
4067            .parse_facts(Vec::new())
4068            .warnings(Vec::new())
4069            .task_durations(Vec::new())
4070            .build();
4071        let invalid = sample_report()?;
4072
4073        let batch = BatchReport::from_items(vec![valid, invalid], Vec::new(), Duration::ZERO);
4074
4075        assert_eq!(batch.summary.total_files, 2);
4076        assert_eq!(batch.summary.valid, 1);
4077        assert_eq!(batch.summary.invalid, 1);
4078        assert_eq!(
4079            batch.summary.worst_exit_category,
4080            ExitCategory::ValidationFailed
4081        );
4082        Ok(())
4083    }
4084}