#![forbid(unsafe_code)]
#![warn(rust_2024_compatibility, missing_docs, missing_debug_implementations)]
mod generated_profiles;
mod parser;
mod profile;
mod validation;
mod xmp;
use std::{
collections::BTreeMap,
fmt,
io::{self, Write},
num::{NonZeroU32, NonZeroU64},
path::{Path, PathBuf},
time::Duration,
};
pub use parser::{
CosObject, DecodeParams, DecoderRegistry, Dictionary, IndirectObject, ObjectStore,
ParseOptions, ParsedDocument, Parser, PdfName, PdfSource, PdfString, SourceStorage,
StreamDecoder, StreamObject, Trailer,
};
#[cfg(feature = "custom-profiles")]
pub use profile::CustomProfileRepository;
pub use profile::{
BinaryOp, BuiltinFunction, BuiltinProfileRepository, ErrorTemplate, ModelValue, ObjectTypeName,
ProfileCatalogEntry, ProfileImportSummary, ProfileRepository, PropertyName, PropertyPath, Rule,
RuleEvaluator, RuleExpr, RuleOutcome, UnaryOp, ValidationProfile, display_flavour,
import_verapdf_profile_xml,
};
use secrecy::{ExposeSecret, SecretString};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use typed_builder::TypedBuilder;
pub use validation::{
AnnotationModel, CatalogModel, ContentStreamModel, FeatureSelection, FontModel, InputName,
LinkName, MetadataModel, ModelGraph, ModelObject, ModelObjectRef, ObjectIdentity,
OutputIntentModel, PageModel, Validator,
};
pub use xmp::{
DetectedFlavours, FlavourClaim, FlavourDetector, NamespaceBinding, XmpIdentificationKind,
XmpPacket, XmpParser,
};
pub const ENGINE_VERSION: &str = env!("CARGO_PKG_VERSION");
const MAX_IDENTIFIER_BYTES: usize = 128;
const MAX_TEXT_BYTES: usize = 4096;
const DEFAULT_MAX_PASSWORD_BYTES: usize = 1024;
const HARD_MAX_PASSWORD_BYTES: usize = 4096;
const DEFAULT_MAX_STRING_BYTES: usize = 1_048_576;
const DEFAULT_MAX_STREAM_DECODE_BYTES: u64 = 256 * 1024 * 1024;
const DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES: u64 = 64;
const DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES: u64 = 16 * 1024 * 1024;
const DEFAULT_MAX_XMP_BYTES: u64 = 4 * 1024 * 1024;
const DEFAULT_MAX_XMP_ELEMENTS: u64 = 50_000;
const DEFAULT_MAX_XMP_DEPTH: u32 = 32;
const DEFAULT_MAX_XMP_ATTRIBUTES: usize = 64;
const DEFAULT_MAX_XMP_NAMESPACES: usize = 256;
const DEFAULT_MAX_XMP_TEXT_BYTES: usize = 4096;
pub type Result<T> = std::result::Result<T, PdfvError>;
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum PdfvError {
#[error("I/O error{path}: {source}", path = format_optional_path(.path.as_ref()))]
Io {
path: Option<PathBuf>,
#[source]
source: std::io::Error,
},
#[error("parse error: {0}")]
Parse(#[from] ParseError),
#[error("profile error: {0}")]
Profile(#[from] ProfileError),
#[error("validation error: {0}")]
Validation(#[from] ValidationError),
#[error("policy error: {0}")]
Policy(#[from] PolicyError),
#[error("repair error: {0}")]
Repair(#[from] RepairError),
#[error("report error: {0}")]
Report(#[from] ReportError),
#[error("configuration error: {0}")]
Configuration(#[from] ConfigError),
}
#[derive(Debug, Error, Clone, Eq, PartialEq)]
#[non_exhaustive]
pub enum ParseError {
#[error("resource limit exceeded: {limit}")]
LimitExceeded {
limit: &'static str,
},
#[error("arithmetic overflow while parsing {context}")]
ArithmeticOverflow {
context: &'static str,
},
#[error("malformed PDF syntax: {message}")]
Malformed {
message: BoundedText,
},
#[error("missing PDF object: {message}")]
MissingObject {
message: BoundedText,
},
#[error("unsupported stream filter: {filter}")]
UnsupportedFilter {
filter: BoundedText,
},
#[error("stream decode failed: {message}")]
StreamDecode {
message: BoundedText,
},
}
#[derive(Debug, Error, Clone, Eq, PartialEq)]
#[non_exhaustive]
pub enum ProfileError {
#[error("unsupported profile selection")]
UnsupportedSelection,
#[error("invalid profile field {field}: {reason}")]
InvalidField {
field: &'static str,
reason: BoundedText,
},
#[error("rule evaluation budget exceeded: {budget}")]
BudgetExceeded {
budget: &'static str,
},
#[error("unknown model property {property}")]
UnknownProperty {
property: BoundedText,
},
#[error("rule expression type mismatch: {message}")]
TypeMismatch {
message: BoundedText,
},
#[error("unsupported rule expression: {reason}")]
UnsupportedRule {
reason: BoundedText,
},
#[error("invalid profile XML: {reason}")]
InvalidXml {
reason: BoundedText,
},
}
#[derive(Debug, Error, Clone, Eq, PartialEq)]
#[non_exhaustive]
pub enum ValidationError {
#[error("validation subsystem is unavailable: {subsystem}")]
SubsystemUnavailable {
subsystem: &'static str,
},
#[error("validation traversal limit exceeded: {limit}")]
LimitExceeded {
limit: &'static str,
},
}
#[derive(Debug, Error, Clone, Eq, PartialEq)]
#[non_exhaustive]
pub enum PolicyError {
#[error("invalid policy field {field}: {reason}")]
InvalidField {
field: &'static str,
reason: BoundedText,
},
#[error("policy rule could not be evaluated: {reason}")]
Evaluation {
reason: BoundedText,
},
}
#[derive(Debug, Error, Clone, Eq, PartialEq)]
#[non_exhaustive]
pub enum RepairError {
#[error("invalid repair field {field}: {reason}")]
InvalidField {
field: &'static str,
reason: BoundedText,
},
#[error("metadata repair failed: {reason}")]
Failed {
reason: BoundedText,
},
}
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum ReportError {
#[error("JSON serialization failed")]
Json {
#[from]
source: serde_json::Error,
},
#[error("XML serialization failed: {message}")]
Xml {
message: BoundedText,
},
#[error("report output write failed")]
Write {
#[source]
source: std::io::Error,
},
}
#[derive(Debug, Error, Clone, Eq, PartialEq)]
#[non_exhaustive]
pub enum ConfigError {
#[error("invalid configuration value {field}: {reason}")]
InvalidValue {
field: &'static str,
reason: BoundedText,
},
}
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Deserialize, Serialize)]
#[serde(try_from = "String", into = "String")]
pub struct BoundedText(String);
impl BoundedText {
pub fn new(
value: impl Into<String>,
max_bytes: usize,
) -> std::result::Result<Self, ConfigError> {
let value = value.into();
if value.len() > max_bytes {
return Err(ConfigError::InvalidValue {
field: "text",
reason: Self::unchecked("value exceeds byte limit"),
});
}
Ok(Self(value))
}
#[must_use]
pub fn as_str(&self) -> &str {
&self.0
}
pub(crate) fn unchecked(value: impl Into<String>) -> Self {
Self(value.into())
}
}
impl fmt::Display for BoundedText {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str(&self.0)
}
}
impl TryFrom<String> for BoundedText {
type Error = ConfigError;
fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
Self::new(value, MAX_TEXT_BYTES)
}
}
impl From<BoundedText> for String {
fn from(value: BoundedText) -> Self {
value.0
}
}
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Deserialize, Serialize)]
#[serde(try_from = "String", into = "String")]
pub struct Identifier(String);
impl Identifier {
pub fn new(value: impl Into<String>) -> std::result::Result<Self, ConfigError> {
let value = value.into();
let valid_charset = value
.bytes()
.all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b'.' | b':'));
if value.is_empty() || value.len() > MAX_IDENTIFIER_BYTES || !valid_charset {
return Err(ConfigError::InvalidValue {
field: "identifier",
reason: BoundedText::unchecked("identifier violates byte or charset policy"),
});
}
Ok(Self(value))
}
#[must_use]
pub fn as_str(&self) -> &str {
&self.0
}
pub(crate) fn unchecked(value: impl Into<String>) -> Self {
Self(value.into())
}
}
impl TryFrom<String> for Identifier {
type Error = ConfigError;
fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
Self::new(value)
}
}
impl From<Identifier> for String {
fn from(value: Identifier) -> Self {
value.0
}
}
#[derive(Clone)]
pub struct PasswordSecret(SecretString);
impl PasswordSecret {
pub fn new(value: impl Into<String>) -> std::result::Result<Self, ConfigError> {
Self::new_with_limit(value, DEFAULT_MAX_PASSWORD_BYTES)
}
pub fn new_with_limit(
value: impl Into<String>,
max_bytes: usize,
) -> std::result::Result<Self, ConfigError> {
if max_bytes > HARD_MAX_PASSWORD_BYTES {
return Err(ConfigError::InvalidValue {
field: "maxPasswordBytes",
reason: BoundedText::unchecked("value exceeds hard cap"),
});
}
let value = value.into();
if value.len() > max_bytes {
return Err(ConfigError::InvalidValue {
field: "password",
reason: BoundedText::unchecked("password exceeds byte limit"),
});
}
Ok(Self(SecretString::from(value)))
}
pub(crate) fn expose_secret_bytes(&self) -> &[u8] {
self.0.expose_secret().as_bytes()
}
}
impl fmt::Debug for PasswordSecret {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("PasswordSecret([REDACTED])")
}
}
#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct ValidationOptions {
#[builder(default)]
pub flavour: FlavourSelection,
#[builder(default)]
pub resource_limits: ResourceLimits,
#[builder(default)]
#[serde(skip, default)]
pub password: Option<PasswordSecret>,
#[builder(default)]
pub max_failed_assertions_per_rule: MaxDisplayedFailures,
#[builder(default)]
pub record_passed_assertions: bool,
#[builder(default = true)]
pub report_parse_warnings: bool,
#[builder(default)]
pub feature_selection: FeatureSelection,
#[builder(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub policy: Option<PolicySet>,
}
impl Default for ValidationOptions {
fn default() -> Self {
Self::builder().build()
}
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub enum FlavourSelection {
Auto {
default: Option<ValidationFlavour>,
},
Explicit {
flavour: ValidationFlavour,
},
CustomProfile {
profile_path: PathBuf,
},
}
impl Default for FlavourSelection {
fn default() -> Self {
Self::Auto {
default: Some(ValidationFlavour {
family: Identifier::unchecked("pdfa"),
part: NonZeroU32::MIN,
conformance: Identifier::unchecked("b"),
}),
}
}
}
#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct ValidationFlavour {
pub family: Identifier,
pub part: NonZeroU32,
pub conformance: Identifier,
}
impl ValidationFlavour {
pub fn new(
family: impl Into<String>,
part: NonZeroU32,
conformance: impl Into<String>,
) -> std::result::Result<Self, ConfigError> {
Ok(Self {
family: Identifier::new(family)?,
part,
conformance: Identifier::new(conformance)?,
})
}
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct ResourceLimits {
pub max_file_bytes: u64,
pub max_objects: u64,
pub max_object_depth: u32,
pub max_array_len: u64,
pub max_dict_entries: u64,
pub max_name_bytes: usize,
pub max_string_bytes: usize,
#[builder(default = DEFAULT_MAX_PASSWORD_BYTES)]
#[serde(default = "default_max_password_bytes")]
pub max_password_bytes: usize,
#[builder(default = DEFAULT_MAX_STRING_BYTES)]
#[serde(default = "default_max_decrypted_string_bytes")]
pub max_decrypted_string_bytes: usize,
pub max_stream_declared_bytes: u64,
pub max_stream_decode_bytes: u64,
#[builder(default = DEFAULT_MAX_STREAM_DECODE_BYTES)]
#[serde(default = "default_max_decrypted_stream_bytes")]
pub max_decrypted_stream_bytes: u64,
#[builder(default = DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES)]
#[serde(default = "default_max_encryption_dict_entries")]
pub max_encryption_dict_entries: u64,
#[builder(default = DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES)]
#[serde(default = "default_memory_source_threshold_bytes")]
pub memory_source_threshold_bytes: u64,
pub max_parse_facts: usize,
#[builder(default = DEFAULT_MAX_XMP_BYTES)]
#[serde(default = "default_max_xmp_bytes")]
pub max_xmp_bytes: u64,
#[builder(default = DEFAULT_MAX_XMP_ELEMENTS)]
#[serde(default = "default_max_xmp_elements")]
pub max_xmp_elements: u64,
#[builder(default = DEFAULT_MAX_XMP_DEPTH)]
#[serde(default = "default_max_xmp_depth")]
pub max_xmp_depth: u32,
#[builder(default = DEFAULT_MAX_XMP_ATTRIBUTES)]
#[serde(default = "default_max_xmp_attributes")]
pub max_xmp_attributes: usize,
#[builder(default = DEFAULT_MAX_XMP_NAMESPACES)]
#[serde(default = "default_max_xmp_namespaces")]
pub max_xmp_namespaces: usize,
#[builder(default = DEFAULT_MAX_XMP_TEXT_BYTES)]
#[serde(default = "default_max_xmp_text_bytes")]
pub max_xmp_text_bytes: usize,
}
impl Default for ResourceLimits {
fn default() -> Self {
Self {
max_file_bytes: 256 * 1024 * 1024,
max_objects: 1_000_000,
max_object_depth: 128,
max_array_len: 65_536,
max_dict_entries: 16_384,
max_name_bytes: 127,
max_string_bytes: DEFAULT_MAX_STRING_BYTES,
max_password_bytes: DEFAULT_MAX_PASSWORD_BYTES,
max_decrypted_string_bytes: DEFAULT_MAX_STRING_BYTES,
max_stream_declared_bytes: 128 * 1024 * 1024,
max_stream_decode_bytes: DEFAULT_MAX_STREAM_DECODE_BYTES,
max_decrypted_stream_bytes: DEFAULT_MAX_STREAM_DECODE_BYTES,
max_encryption_dict_entries: DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES,
memory_source_threshold_bytes: DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES,
max_parse_facts: 100_000,
max_xmp_bytes: DEFAULT_MAX_XMP_BYTES,
max_xmp_elements: DEFAULT_MAX_XMP_ELEMENTS,
max_xmp_depth: DEFAULT_MAX_XMP_DEPTH,
max_xmp_attributes: DEFAULT_MAX_XMP_ATTRIBUTES,
max_xmp_namespaces: DEFAULT_MAX_XMP_NAMESPACES,
max_xmp_text_bytes: DEFAULT_MAX_XMP_TEXT_BYTES,
}
}
}
fn default_max_password_bytes() -> usize {
DEFAULT_MAX_PASSWORD_BYTES
}
fn default_max_decrypted_string_bytes() -> usize {
DEFAULT_MAX_STRING_BYTES
}
fn default_max_decrypted_stream_bytes() -> u64 {
DEFAULT_MAX_STREAM_DECODE_BYTES
}
fn default_max_encryption_dict_entries() -> u64 {
DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES
}
fn default_memory_source_threshold_bytes() -> u64 {
DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES
}
fn default_max_xmp_bytes() -> u64 {
DEFAULT_MAX_XMP_BYTES
}
fn default_max_xmp_elements() -> u64 {
DEFAULT_MAX_XMP_ELEMENTS
}
fn default_max_xmp_depth() -> u32 {
DEFAULT_MAX_XMP_DEPTH
}
fn default_max_xmp_attributes() -> usize {
DEFAULT_MAX_XMP_ATTRIBUTES
}
fn default_max_xmp_namespaces() -> usize {
DEFAULT_MAX_XMP_NAMESPACES
}
fn default_max_xmp_text_bytes() -> usize {
DEFAULT_MAX_XMP_TEXT_BYTES
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize)]
#[serde(try_from = "u32", into = "u32")]
pub struct MaxDisplayedFailures(NonZeroU32);
impl MaxDisplayedFailures {
#[must_use]
pub fn new(value: NonZeroU32) -> Self {
Self(value)
}
#[must_use]
pub fn get(self) -> u32 {
self.0.get()
}
}
impl Default for MaxDisplayedFailures {
fn default() -> Self {
Self(NonZeroU32::MIN)
}
}
impl TryFrom<u32> for MaxDisplayedFailures {
type Error = ConfigError;
fn try_from(value: u32) -> std::result::Result<Self, Self::Error> {
let Some(value) = NonZeroU32::new(value) else {
return Err(ConfigError::InvalidValue {
field: "maxFailedAssertionsPerRule",
reason: BoundedText::unchecked("value must be greater than zero"),
});
};
Ok(Self(value))
}
}
impl From<MaxDisplayedFailures> for u32 {
fn from(value: MaxDisplayedFailures) -> Self {
value.get()
}
}
#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct ValidationReport {
pub engine_version: String,
pub source: InputSummary,
pub status: ValidationStatus,
pub flavours: Vec<ValidationFlavour>,
pub profile_reports: Vec<ProfileReport>,
pub parse_facts: Vec<ParseFact>,
pub warnings: Vec<ValidationWarning>,
#[builder(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub feature_report: Option<FeatureReport>,
#[builder(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub policy_report: Option<PolicyReport>,
pub task_durations: Vec<TaskDuration>,
}
#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct FeatureReport {
pub objects: Vec<FeatureObject>,
pub visited_objects: u64,
pub selected_families: Vec<ObjectTypeName>,
pub truncated: bool,
}
#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct FeatureObject {
pub family: ObjectTypeName,
pub location: ObjectLocation,
pub context: BoundedText,
pub properties: BTreeMap<PropertyName, FeatureValue>,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", tag = "type", content = "value")]
pub enum FeatureValue {
Null,
Bool(bool),
Number(f64),
String(BoundedText),
RedactedString {
bytes: u64,
},
ObjectKey(ObjectKey),
List(Vec<FeatureValue>),
}
#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct PolicySet {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub name: Option<BoundedText>,
pub rules: Vec<PolicyRule>,
}
impl PolicySet {
pub fn validate(&self) -> std::result::Result<(), PolicyError> {
const MAX_POLICY_RULES: usize = 1024;
if self.rules.is_empty() {
return Err(PolicyError::InvalidField {
field: "rules",
reason: BoundedText::unchecked("policy must contain at least one rule"),
});
}
if self.rules.len() > MAX_POLICY_RULES {
return Err(PolicyError::InvalidField {
field: "rules",
reason: BoundedText::unchecked("policy rule count exceeds limit"),
});
}
Ok(())
}
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct PolicyRule {
pub id: Identifier,
pub description: BoundedText,
pub family: ObjectTypeName,
pub field: PropertyName,
pub operator: PolicyOperator,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub value: Option<PolicyValue>,
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase")]
pub enum PolicyOperator {
Exists,
Absent,
Equals,
NotEquals,
Min,
Max,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", tag = "type", content = "value")]
pub enum PolicyValue {
Bool(bool),
Number(i32),
String(BoundedText),
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct PolicyReport {
#[builder(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub name: Option<BoundedText>,
pub is_compliant: bool,
pub results: Vec<PolicyRuleResult>,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct PolicyRuleResult {
pub id: Identifier,
pub description: BoundedText,
pub passed: bool,
pub matches: u64,
pub message: BoundedText,
}
#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct RepairReport {
pub engine_version: String,
pub source: InputSummary,
#[serde(skip_serializing_if = "Option::is_none")]
pub output_path: Option<PathBuf>,
pub status: RepairStatus,
pub actions: Vec<RepairAction>,
#[serde(skip_serializing_if = "Option::is_none")]
pub refusal: Option<RepairRefusal>,
pub warnings: Vec<ValidationWarning>,
pub task_durations: Vec<TaskDuration>,
}
impl RepairReport {
#[must_use]
pub fn wrote_output(&self) -> bool {
matches!(
self.status,
RepairStatus::Succeeded | RepairStatus::NoAction
) && self.output_path.is_some()
}
}
#[derive(Clone, Debug)]
pub struct MetadataRepairOptions {
pub validation_options: ValidationOptions,
pub output_dir: PathBuf,
pub prefix: String,
}
impl MetadataRepairOptions {
pub fn new(
validation_options: ValidationOptions,
output_dir: impl AsRef<Path>,
prefix: impl Into<String>,
) -> Result<Self> {
Ok(Self {
validation_options,
output_dir: validate_repair_output_dir(output_dir.as_ref())?,
prefix: validate_repair_prefix(&prefix.into())?,
})
}
}
#[derive(Debug)]
pub struct MetadataRepairer {
validator: Validator,
output_dir: PathBuf,
prefix: String,
}
impl MetadataRepairer {
pub fn new(options: MetadataRepairOptions) -> Result<Self> {
Ok(Self {
validator: Validator::new(options.validation_options)?,
output_dir: options.output_dir,
prefix: options.prefix,
})
}
pub fn repair_path(&self, path: impl AsRef<Path>) -> Result<RepairReport> {
repair_metadata_path(
&self.validator,
path.as_ref(),
&self.output_dir,
&self.prefix,
)
}
}
#[derive(Clone, Debug, Deserialize, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct RepairBatchReport {
pub items: Vec<RepairReport>,
pub summary: RepairBatchSummary,
pub warnings: Vec<ValidationWarning>,
}
impl RepairBatchReport {
#[must_use]
pub fn from_items(
items: Vec<RepairReport>,
warnings: Vec<ValidationWarning>,
elapsed: Duration,
) -> Self {
let summary = RepairBatchSummary::from_items(&items, elapsed);
Self {
items,
summary,
warnings,
}
}
}
#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct RepairBatchSummary {
pub total_files: u64,
pub succeeded: u64,
pub no_action: u64,
pub refused: u64,
pub failed: u64,
pub elapsed_millis: u64,
pub worst_exit_category: ExitCategory,
}
impl RepairBatchSummary {
#[must_use]
pub fn from_items(items: &[RepairReport], elapsed: Duration) -> Self {
let mut summary = Self {
total_files: u64::try_from(items.len()).unwrap_or(u64::MAX),
elapsed_millis: duration_millis(elapsed),
..Self::default()
};
for item in items {
match item.status {
RepairStatus::Succeeded => summary.succeeded = summary.succeeded.saturating_add(1),
RepairStatus::NoAction => summary.no_action = summary.no_action.saturating_add(1),
RepairStatus::Refused => summary.refused = summary.refused.saturating_add(1),
RepairStatus::Failed => summary.failed = summary.failed.saturating_add(1),
}
}
summary.worst_exit_category = if summary.failed > 0 {
ExitCategory::InternalError
} else if summary.refused > 0 {
ExitCategory::ProcessingFailed
} else {
ExitCategory::Success
};
summary
}
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase")]
pub enum RepairStatus {
Succeeded,
NoAction,
Refused,
Failed,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", tag = "kind")]
pub enum RepairAction {
CopiedUnchanged,
MetadataRewritten {
description: BoundedText,
},
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", tag = "kind")]
pub enum RepairRefusal {
ParseFailed {
reason: BoundedText,
},
Encrypted,
AmbiguousFlavour {
selected: u64,
},
UnsupportedValidationStatus {
status: ValidationStatus,
},
OutputWouldModifyInput,
InvalidOutputPath {
reason: BoundedText,
},
}
#[allow(
clippy::disallowed_methods,
reason = "metadata repair is an explicit synchronous file rewrite API, not an async service \
path"
)]
fn repair_metadata_path(
validator: &Validator,
path: &Path,
output_dir: &Path,
prefix: &str,
) -> Result<RepairReport> {
let source = input_summary_for_path(path)?;
let output_path = repair_output_path(path, output_dir, prefix)?;
let input_canonical = std::fs::canonicalize(path).map_err(|source| PdfvError::Io {
path: Some(path.to_path_buf()),
source,
})?;
if input_canonical == output_path {
return Ok(refused_repair_report(
source,
RepairRefusal::OutputWouldModifyInput,
));
}
if output_path.exists() {
return Ok(refused_repair_report(
source,
RepairRefusal::InvalidOutputPath {
reason: BoundedText::unchecked("output path already exists"),
},
));
}
let started = std::time::Instant::now();
let validation = validator.validate_path(path)?;
if matches!(validation.status, ValidationStatus::ParseFailed) {
return Ok(refused_repair_report(
source,
RepairRefusal::ParseFailed {
reason: validation
.warnings
.first()
.map_or_else(default_parse_failed_text, ValidationWarning::message_text),
},
));
}
if matches!(validation.status, ValidationStatus::Encrypted) {
return Ok(refused_repair_report(source, RepairRefusal::Encrypted));
}
let selected_profiles = if validation.flavours.is_empty() {
validation.profile_reports.len()
} else {
validation.flavours.len()
};
if selected_profiles != 1 {
return Ok(refused_repair_report(
source,
RepairRefusal::AmbiguousFlavour {
selected: u64::try_from(selected_profiles).unwrap_or(u64::MAX),
},
));
}
if !matches!(validation.status, ValidationStatus::Valid) {
return Ok(refused_repair_report(
source,
RepairRefusal::UnsupportedValidationStatus {
status: validation.status,
},
));
}
match atomic_copy(path, &output_path) {
Ok(()) => Ok(RepairReport::builder()
.engine_version(ENGINE_VERSION.to_owned())
.source(source)
.output_path(Some(output_path))
.status(RepairStatus::NoAction)
.actions(vec![RepairAction::CopiedUnchanged])
.refusal(None)
.warnings(Vec::new())
.task_durations(vec![TaskDuration::from_duration(
Identifier::new("repairMetadata")?,
started.elapsed(),
)])
.build()),
Err(error) => {
remove_failed_output(&output_path)?;
Ok(failed_repair_report(
source,
Some(output_path),
&error.to_string(),
))
}
}
}
#[allow(
clippy::disallowed_methods,
reason = "metadata repair reports filesystem input size synchronously"
)]
fn input_summary_for_path(path: &Path) -> Result<InputSummary> {
let metadata = std::fs::metadata(path).map_err(|source| PdfvError::Io {
path: Some(path.to_path_buf()),
source,
})?;
Ok(InputSummary::new(
InputKind::File,
Some(path.to_path_buf()),
Some(metadata.len()),
))
}
#[allow(
clippy::disallowed_methods,
reason = "metadata repair validates a caller-selected filesystem output directory"
)]
fn validate_repair_output_dir(path: &Path) -> Result<PathBuf> {
let metadata = std::fs::metadata(path).map_err(|source| PdfvError::Io {
path: Some(path.to_path_buf()),
source,
})?;
if !metadata.is_dir() {
return Err(RepairError::InvalidField {
field: "outputDir",
reason: BoundedText::unchecked("output directory is not a directory"),
}
.into());
}
std::fs::canonicalize(path).map_err(|source| PdfvError::Io {
path: Some(path.to_path_buf()),
source,
})
}
fn validate_repair_prefix(prefix: &str) -> Result<String> {
const MAX_REPAIR_PREFIX_BYTES: usize = 64;
let valid = prefix.len() <= MAX_REPAIR_PREFIX_BYTES
&& prefix
.bytes()
.all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b'.'));
if valid {
Ok(prefix.to_owned())
} else {
Err(RepairError::InvalidField {
field: "prefix",
reason: BoundedText::unchecked(
"prefix must be ASCII letters, digits, dot, dash, or underscore and at most 64 \
bytes",
),
}
.into())
}
}
fn repair_output_path(path: &Path, output_dir: &Path, prefix: &str) -> Result<PathBuf> {
let file_name = path
.file_name()
.and_then(|name| name.to_str())
.ok_or_else(|| RepairError::InvalidField {
field: "paths",
reason: BoundedText::unchecked("input path must have a UTF-8 file name"),
})?;
validate_output_filename(file_name)?;
let output_name = format!("{prefix}{file_name}");
validate_output_filename(&output_name)?;
Ok(output_dir.join(output_name))
}
fn validate_output_filename(name: &str) -> Result<()> {
const MAX_OUTPUT_FILENAME_BYTES: usize = 255;
let valid = !name.is_empty()
&& name.len() <= MAX_OUTPUT_FILENAME_BYTES
&& !name.contains("..")
&& name
.bytes()
.all(|byte| byte != b'\0' && byte != b'/' && byte != b'\\');
if valid {
Ok(())
} else {
Err(RepairError::InvalidField {
field: "output",
reason: BoundedText::unchecked("output filename is invalid"),
}
.into())
}
}
#[allow(
clippy::disallowed_methods,
clippy::disallowed_types,
reason = "metadata repair performs synchronous atomic file output by design"
)]
fn atomic_copy(input: &Path, output_path: &Path) -> Result<()> {
let Some(parent) = output_path.parent() else {
return Err(RepairError::InvalidField {
field: "outputDir",
reason: BoundedText::unchecked("output path has no parent"),
}
.into());
};
let mut source = std::fs::File::open(input).map_err(|source| PdfvError::Io {
path: Some(input.to_path_buf()),
source,
})?;
let mut temp = tempfile::NamedTempFile::new_in(parent).map_err(|source| PdfvError::Io {
path: Some(parent.to_path_buf()),
source,
})?;
io::copy(&mut source, &mut temp).map_err(|source| PdfvError::Io {
path: Some(input.to_path_buf()),
source,
})?;
temp.flush().map_err(|source| PdfvError::Io {
path: Some(output_path.to_path_buf()),
source,
})?;
temp.persist(output_path).map_err(|error| PdfvError::Io {
path: Some(output_path.to_path_buf()),
source: error.error,
})?;
Ok(())
}
#[allow(
clippy::disallowed_methods,
reason = "metadata repair removes failed synchronous output artifacts"
)]
fn remove_failed_output(output_path: &Path) -> Result<()> {
match std::fs::remove_file(output_path) {
Ok(()) => Ok(()),
Err(error) if error.kind() == std::io::ErrorKind::NotFound => Ok(()),
Err(source) => Err(PdfvError::Io {
path: Some(output_path.to_path_buf()),
source,
}),
}
}
fn refused_repair_report(source: InputSummary, refusal: RepairRefusal) -> RepairReport {
RepairReport::builder()
.engine_version(ENGINE_VERSION.to_owned())
.source(source)
.output_path(None)
.status(RepairStatus::Refused)
.actions(Vec::new())
.refusal(Some(refusal))
.warnings(Vec::new())
.task_durations(Vec::new())
.build()
}
fn failed_repair_report(
source: InputSummary,
output_path: Option<PathBuf>,
reason: &str,
) -> RepairReport {
RepairReport::builder()
.engine_version(ENGINE_VERSION.to_owned())
.source(source)
.output_path(output_path)
.status(RepairStatus::Failed)
.actions(Vec::new())
.refusal(None)
.warnings(vec![ValidationWarning::General {
message: BoundedText::new(reason, 512)
.unwrap_or_else(|_| BoundedText::unchecked("metadata repair failed")),
}])
.task_durations(Vec::new())
.build()
}
fn default_parse_failed_text() -> BoundedText {
BoundedText::unchecked("parse failed")
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct InputSummary {
pub kind: InputKind,
#[serde(skip_serializing_if = "Option::is_none")]
pub path: Option<PathBuf>,
#[serde(skip_serializing_if = "Option::is_none")]
pub bytes: Option<u64>,
}
impl InputSummary {
#[must_use]
pub fn new(kind: InputKind, path: Option<PathBuf>, bytes: Option<u64>) -> Self {
Self { kind, path, bytes }
}
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase")]
pub enum InputKind {
File,
Memory,
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase")]
pub enum ValidationStatus {
Valid,
Invalid,
Encrypted,
Incomplete,
ParseFailed,
}
#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct ProfileReport {
pub profile: ProfileIdentity,
pub is_compliant: bool,
pub checks_executed: u64,
pub rules_executed: u64,
pub failed_rules: u64,
pub failed_assertions: Vec<Assertion>,
pub passed_assertions: Vec<Assertion>,
pub unsupported_rules: Vec<UnsupportedRule>,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct ProfileIdentity {
pub id: Identifier,
pub name: BoundedText,
#[serde(skip_serializing_if = "Option::is_none")]
pub version: Option<Identifier>,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct Assertion {
pub ordinal: NonZeroU64,
pub rule_id: RuleId,
pub status: AssertionStatus,
pub description: BoundedText,
pub location: ObjectLocation,
#[serde(skip_serializing_if = "Option::is_none")]
pub object_context: Option<BoundedText>,
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<BoundedText>,
pub error_arguments: Vec<ErrorArgument>,
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase")]
pub enum AssertionStatus {
Passed,
Failed,
}
#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
#[serde(transparent)]
pub struct RuleId(pub Identifier);
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct ObjectLocation {
#[serde(skip_serializing_if = "Option::is_none")]
pub object: Option<ObjectKey>,
#[serde(skip_serializing_if = "Option::is_none")]
pub offset: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub path: Option<BoundedText>,
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct ObjectKey {
pub number: NonZeroU32,
pub generation: u16,
}
impl ObjectKey {
#[must_use]
pub fn new(number: NonZeroU32, generation: u16) -> Self {
Self { number, generation }
}
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct ErrorArgument {
pub name: Identifier,
pub value: BoundedText,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct UnsupportedRule {
pub profile_id: Identifier,
pub rule_id: RuleId,
#[serde(skip_serializing_if = "Option::is_none")]
pub expression_fragment: Option<BoundedText>,
pub reason: BoundedText,
pub references: Vec<SpecReference>,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct SpecReference {
pub specification: BoundedText,
pub clause: BoundedText,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", tag = "kind")]
pub enum ParseFact {
Header {
offset: u64,
version: PdfVersion,
#[serde(rename = "hadLeadingBytes")]
had_leading_bytes: bool,
},
PostEofData {
bytes: u64,
},
Xref {
section: ObjectLocation,
fact: XrefFact,
},
Stream {
object: ObjectKey,
fact: StreamFact,
},
Encryption {
encrypted: bool,
#[serde(skip_serializing_if = "Option::is_none")]
handler: Option<Identifier>,
#[serde(skip_serializing_if = "Option::is_none")]
version: Option<u8>,
#[serde(skip_serializing_if = "Option::is_none")]
revision: Option<u8>,
#[serde(skip_serializing_if = "Option::is_none")]
algorithm: Option<Identifier>,
decrypted: bool,
},
Xmp {
object: ObjectKey,
fact: XmpFact,
},
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct PdfVersion {
pub major: u8,
pub minor: u8,
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase")]
pub enum XrefFact {
EolMarkersComply,
MalformedClassic,
XrefStreamUnsupported,
XrefStreamParsed {
entries: u64,
compressed_entries: u64,
},
PrevChain {
offset: u64,
},
HybridReference {
offset: u64,
},
ObjectStreamParsed,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields, tag = "fact")]
pub enum StreamFact {
Length {
declared: u64,
discovered: u64,
},
KeywordSpacing {
#[serde(rename = "streamKeywordCRLFCompliant")]
stream_keyword_crlf_compliant: bool,
#[serde(rename = "endstreamKeywordEolCompliant")]
endstream_keyword_eol_compliant: bool,
},
Decoded {
bytes: u64,
},
FilterDecoded {
filter: Identifier,
input_bytes: u64,
output_bytes: u64,
},
FilterMetadataMode {
filter: Identifier,
bytes: u64,
},
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields, tag = "fact")]
pub enum XmpFact {
PacketParsed {
bytes: u64,
namespaces: u64,
claims: u64,
},
MissingPacketWrapper,
FlavourClaim {
family: Identifier,
display_flavour: BoundedText,
namespace_uri: BoundedText,
},
Malformed {
reason: BoundedText,
},
HostileXmlRejected {
reason: BoundedText,
},
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", tag = "kind")]
pub enum ValidationWarning {
ParseFactCapReached {
cap: usize,
},
IncompatibleProfile {
profile_id: Identifier,
reason: BoundedText,
},
AutoDetection {
message: BoundedText,
},
General {
message: BoundedText,
},
}
impl ValidationWarning {
#[must_use]
pub fn message_text(&self) -> BoundedText {
match self {
Self::ParseFactCapReached { cap } => {
BoundedText::unchecked(format!("parse fact cap reached: {cap}"))
}
Self::IncompatibleProfile { profile_id, reason } => BoundedText::unchecked(format!(
"incompatible profile {}: {}",
profile_id.as_str(),
reason.as_str()
)),
Self::AutoDetection { message } => {
BoundedText::unchecked(format!("auto detection: {}", message.as_str()))
}
Self::General { message } => message.clone(),
}
}
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct TaskDuration {
pub task: Identifier,
pub millis: u64,
}
impl TaskDuration {
#[must_use]
pub fn from_duration(task: Identifier, duration: Duration) -> Self {
let millis = u64::try_from(duration.as_millis()).unwrap_or(u64::MAX);
Self { task, millis }
}
}
#[derive(Clone, Debug, Deserialize, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct BatchReport {
pub items: Vec<ValidationReport>,
pub summary: BatchSummary,
pub warnings: Vec<ValidationWarning>,
}
impl BatchReport {
#[must_use]
pub fn from_items(
items: Vec<ValidationReport>,
warnings: Vec<ValidationWarning>,
elapsed: Duration,
) -> Self {
let summary = BatchSummary::from_items(&items, elapsed);
Self {
items,
summary,
warnings,
}
}
#[must_use]
pub fn from_items_with_internal_errors(
items: Vec<ValidationReport>,
warnings: Vec<ValidationWarning>,
elapsed: Duration,
internal_errors: u64,
) -> Self {
let summary =
BatchSummary::from_items_with_internal_errors(&items, elapsed, internal_errors);
Self {
items,
summary,
warnings,
}
}
}
#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
#[non_exhaustive]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct BatchSummary {
pub total_files: u64,
pub valid: u64,
pub invalid: u64,
pub parse_failures: u64,
pub encrypted: u64,
pub incomplete: u64,
pub internal_errors: u64,
pub elapsed_millis: u64,
pub worst_exit_category: ExitCategory,
}
impl BatchSummary {
#[must_use]
pub fn from_items(items: &[ValidationReport], elapsed: Duration) -> Self {
let mut summary = Self {
total_files: u64::try_from(items.len()).unwrap_or(u64::MAX),
elapsed_millis: duration_millis(elapsed),
..Self::default()
};
summary.apply_items(items);
summary.finish()
}
#[must_use]
pub fn from_items_with_internal_errors(
items: &[ValidationReport],
elapsed: Duration,
internal_errors: u64,
) -> Self {
let mut summary = Self {
total_files: u64::try_from(items.len())
.unwrap_or(u64::MAX)
.saturating_add(internal_errors),
elapsed_millis: duration_millis(elapsed),
internal_errors,
..Self::default()
};
summary.apply_items(items);
summary.finish()
}
fn apply_items(&mut self, items: &[ValidationReport]) {
for report in items {
match report.status {
ValidationStatus::Valid => self.valid = self.valid.saturating_add(1),
ValidationStatus::Invalid => self.invalid = self.invalid.saturating_add(1),
ValidationStatus::ParseFailed => {
self.parse_failures = self.parse_failures.saturating_add(1);
}
ValidationStatus::Encrypted => {
self.encrypted = self.encrypted.saturating_add(1);
}
ValidationStatus::Incomplete => {
self.incomplete = self.incomplete.saturating_add(1);
}
}
}
}
fn finish(mut self) -> Self {
self.worst_exit_category = if self.parse_failures > 0
|| self.encrypted > 0
|| self.incomplete > 0
|| self.internal_errors > 0
{
ExitCategory::ProcessingFailed
} else if self.invalid > 0 {
ExitCategory::ValidationFailed
} else {
ExitCategory::Success
};
self
}
}
#[derive(Clone, Copy, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase")]
pub enum ExitCategory {
#[default]
Success,
ValidationFailed,
ProcessingFailed,
InternalError,
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
#[serde(rename_all = "camelCase")]
pub enum ReportFormat {
Json,
JsonPretty,
Text,
Xml,
RawXml,
Html,
}
impl ReportFormat {
pub fn write_report<W: Write>(&self, report: &ValidationReport, out: W) -> Result<()> {
match self {
Self::Json => JsonReportWriter::compact().write_report(report, out),
Self::JsonPretty => JsonReportWriter::pretty().write_report(report, out),
Self::Text => TextReportWriter.write_report(report, out),
Self::Xml => XmlReportWriter.write_report(report, out),
Self::RawXml => RawXmlReportWriter.write_report(report, out),
Self::Html => HtmlReportWriter.write_report(report, out),
}
}
pub fn write_batch<W: Write>(&self, report: &BatchReport, out: W) -> Result<()> {
match self {
Self::Json => JsonReportWriter::compact().write_batch(report, out),
Self::JsonPretty => JsonReportWriter::pretty().write_batch(report, out),
Self::Text => TextReportWriter.write_batch(report, out),
Self::Xml => XmlReportWriter.write_batch(report, out),
Self::RawXml => RawXmlReportWriter.write_batch(report, out),
Self::Html => HtmlReportWriter.write_batch(report, out),
}
}
pub fn write_repair_report<W: Write>(&self, report: &RepairReport, out: W) -> Result<()> {
match self {
Self::Json => JsonReportWriter::compact().write_repair_report(report, out),
Self::JsonPretty => JsonReportWriter::pretty().write_repair_report(report, out),
Self::Text => TextReportWriter.write_repair_report(report, out),
Self::Xml => XmlReportWriter.write_repair_report(report, out),
Self::RawXml => RawXmlReportWriter.write_repair_report(report, out),
Self::Html => HtmlReportWriter.write_repair_report(report, out),
}
}
pub fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, out: W) -> Result<()> {
match self {
Self::Json => JsonReportWriter::compact().write_repair_batch(report, out),
Self::JsonPretty => JsonReportWriter::pretty().write_repair_batch(report, out),
Self::Text => TextReportWriter.write_repair_batch(report, out),
Self::Xml => XmlReportWriter.write_repair_batch(report, out),
Self::RawXml => RawXmlReportWriter.write_repair_batch(report, out),
Self::Html => HtmlReportWriter.write_repair_batch(report, out),
}
}
}
pub trait ReportWriter {
fn write_report<W: Write>(&self, report: &ValidationReport, out: W) -> Result<()>;
fn write_batch<W: Write>(&self, report: &BatchReport, out: W) -> Result<()>;
fn write_repair_report<W: Write>(&self, report: &RepairReport, out: W) -> Result<()>;
fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, out: W) -> Result<()>;
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct JsonReportWriter {
pretty: bool,
}
impl JsonReportWriter {
#[must_use]
pub fn compact() -> Self {
Self { pretty: false }
}
#[must_use]
pub fn pretty() -> Self {
Self { pretty: true }
}
}
impl ReportWriter for JsonReportWriter {
fn write_report<W: Write>(&self, report: &ValidationReport, out: W) -> Result<()> {
write_json(out, report, self.pretty)
}
fn write_batch<W: Write>(&self, report: &BatchReport, out: W) -> Result<()> {
write_json(out, report, self.pretty)
}
fn write_repair_report<W: Write>(&self, report: &RepairReport, out: W) -> Result<()> {
write_json(out, report, self.pretty)
}
fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, out: W) -> Result<()> {
write_json(out, report, self.pretty)
}
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct TextReportWriter;
impl ReportWriter for TextReportWriter {
fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
write_text_report(report, &mut out)
}
fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
writeln!(
out,
"batch: {}",
exit_category_text(report.summary.worst_exit_category)
)
.map_err(write_error)?;
writeln!(out, "files: {}", report.summary.total_files).map_err(write_error)?;
writeln!(
out,
"summary: {} valid, {} invalid, {} parse failed, {} encrypted, {} incomplete, {} \
internal errors",
report.summary.valid,
report.summary.invalid,
report.summary.parse_failures,
report.summary.encrypted,
report.summary.incomplete,
report.summary.internal_errors,
)
.map_err(write_error)?;
if !report.warnings.is_empty() {
writeln!(out, "warnings: {}", report.warnings.len()).map_err(write_error)?;
}
writeln!(out, "items:").map_err(write_error)?;
for item in &report.items {
writeln!(
out,
" {}: {}",
source_name(&item.source),
status_text(item.status)
)
.map_err(write_error)?;
}
Ok(())
}
fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
write_text_repair_report(report, &mut out)
}
fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
writeln!(
out,
"repair batch: {}",
exit_category_text(report.summary.worst_exit_category)
)
.map_err(write_error)?;
writeln!(out, "files: {}", report.summary.total_files).map_err(write_error)?;
writeln!(
out,
"summary: {} repaired, {} unchanged, {} refused, {} failed",
report.summary.succeeded,
report.summary.no_action,
report.summary.refused,
report.summary.failed,
)
.map_err(write_error)?;
writeln!(out, "items:").map_err(write_error)?;
for item in &report.items {
writeln!(
out,
" {}: {}",
source_name(&item.source),
repair_status_text(item.status),
)
.map_err(write_error)?;
}
Ok(())
}
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct XmlReportWriter;
impl ReportWriter for XmlReportWriter {
fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
let batch = BatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
write_xml_batch(&batch, &mut out)
}
fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
write_xml_batch(report, &mut out)
}
fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
let batch = RepairBatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
write_xml_repair_batch(&batch, &mut out, "repairReport")
}
fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
write_xml_repair_batch(report, &mut out, "repairReport")
}
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct RawXmlReportWriter;
impl ReportWriter for RawXmlReportWriter {
fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
let batch = BatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
write_raw_xml_batch(&batch, &mut out)
}
fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
write_raw_xml_batch(report, &mut out)
}
fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
let batch = RepairBatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
write_xml_repair_batch(&batch, &mut out, "rawRepairReport")
}
fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
write_xml_repair_batch(report, &mut out, "rawRepairReport")
}
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct HtmlReportWriter;
impl ReportWriter for HtmlReportWriter {
fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
let batch = BatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
write_html_batch(&batch, &mut out)
}
fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
write_html_batch(report, &mut out)
}
fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
let batch = RepairBatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
write_html_repair_batch(&batch, &mut out)
}
fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
write_html_repair_batch(report, &mut out)
}
}
fn write_json<W, T>(out: W, value: &T, pretty: bool) -> Result<()>
where
W: Write,
T: Serialize,
{
if pretty {
serde_json::to_writer_pretty(out, value).map_err(ReportError::from)?;
} else {
serde_json::to_writer(out, value).map_err(ReportError::from)?;
}
Ok(())
}
fn write_text_report<W: Write>(report: &ValidationReport, out: &mut W) -> Result<()> {
writeln!(
out,
"{}: {}",
source_name(&report.source),
status_text(report.status),
)
.map_err(write_error)?;
writeln!(out, "profiles: {}", profile_list(report)).map_err(write_error)?;
let checks = check_counts(report);
writeln!(
out,
"checks: {} passed, {} failed, {} unsupported",
checks.passed, checks.failed, checks.unsupported,
)
.map_err(write_error)?;
let failures = report
.profile_reports
.iter()
.flat_map(|profile| profile.failed_assertions.iter())
.take(5)
.collect::<Vec<_>>();
if !failures.is_empty() {
writeln!(out, "first failures:").map_err(write_error)?;
for assertion in failures {
writeln!(
out,
" {} at {}: {}",
assertion.rule_id.0.as_str(),
location_text(&assertion.location),
assertion_message(assertion),
)
.map_err(write_error)?;
}
}
let unsupported = report
.profile_reports
.iter()
.flat_map(|profile| profile.unsupported_rules.iter())
.take(5)
.collect::<Vec<_>>();
if !unsupported.is_empty() {
writeln!(out, "unsupported rules:").map_err(write_error)?;
for rule in unsupported {
writeln!(
out,
" {}: {}{}",
rule.rule_id.0.as_str(),
rule.reason.as_str(),
reference_suffix(&rule.references),
)
.map_err(write_error)?;
}
}
if !report.warnings.is_empty() {
writeln!(out, "warnings: {}", report.warnings.len()).map_err(write_error)?;
}
if let Some(features) = &report.feature_report {
writeln!(out, "features: {} objects", features.objects.len()).map_err(write_error)?;
}
if let Some(policy) = &report.policy_report {
writeln!(
out,
"policy: {}",
if policy.is_compliant {
"compliant"
} else {
"non-compliant"
}
)
.map_err(write_error)?;
}
Ok(())
}
fn write_text_repair_report<W: Write>(report: &RepairReport, out: &mut W) -> Result<()> {
writeln!(
out,
"{}: {}",
source_name(&report.source),
repair_status_text(report.status),
)
.map_err(write_error)?;
if let Some(output_path) = &report.output_path {
writeln!(out, "output: {}", output_path.display()).map_err(write_error)?;
}
if !report.actions.is_empty() {
writeln!(out, "actions: {}", report.actions.len()).map_err(write_error)?;
for action in &report.actions {
writeln!(out, " {}", repair_action_text(action)).map_err(write_error)?;
}
}
if let Some(refusal) = &report.refusal {
writeln!(out, "refusal: {}", repair_refusal_text(refusal)).map_err(write_error)?;
}
if !report.warnings.is_empty() {
writeln!(out, "warnings: {}", report.warnings.len()).map_err(write_error)?;
}
Ok(())
}
fn write_xml_batch<W: Write>(report: &BatchReport, out: &mut W) -> Result<()> {
writeln!(out, r#"<?xml version="1.0" encoding="utf-8"?>"#).map_err(write_error)?;
writeln!(out, "<report>").map_err(write_error)?;
writeln!(out, " <buildInformation>").map_err(write_error)?;
writeln!(
out,
r#" <releaseDetails id="pdfv-core" version="{}"></releaseDetails>"#,
XmlEscapedAttr::new(ENGINE_VERSION)?,
)
.map_err(write_error)?;
writeln!(out, " </buildInformation>").map_err(write_error)?;
writeln!(out, " <jobs>").map_err(write_error)?;
for item in &report.items {
write_xml_job(item, out)?;
}
writeln!(out, " </jobs>").map_err(write_error)?;
write_xml_batch_summary(&report.summary, out)?;
write_xml_warnings(&report.warnings, out, 2)?;
writeln!(out, "</report>").map_err(write_error)?;
Ok(())
}
fn write_raw_xml_batch<W: Write>(report: &BatchReport, out: &mut W) -> Result<()> {
writeln!(out, r#"<?xml version="1.0" encoding="utf-8"?>"#).map_err(write_error)?;
writeln!(
out,
r#"<rawReport engine="pdfv-core" version="{}">"#,
XmlEscapedAttr::new(ENGINE_VERSION)?,
)
.map_err(write_error)?;
writeln!(
out,
r#" <processorConfig tasks="{}"></processorConfig>"#,
XmlEscapedAttr::new(&raw_validation_tasks(report))?,
)
.map_err(write_error)?;
writeln!(out, " <processorResults>").map_err(write_error)?;
for item in &report.items {
writeln!(
out,
r#" <processorResult status="{}">"#,
status_text(item.status),
)
.map_err(write_error)?;
write_xml_item(&item.source, out)?;
for profile in &item.profile_reports {
write_xml_validation_report(item.status, profile, out)?;
}
if let Some(feature_report) = &item.feature_report {
write_xml_feature_report(feature_report, out)?;
}
if let Some(policy_report) = &item.policy_report {
write_xml_policy_report(policy_report, out)?;
}
write_xml_parse_facts(&item.parse_facts, out)?;
write_xml_warnings(&item.warnings, out, 6)?;
writeln!(out, " </processorResult>").map_err(write_error)?;
}
writeln!(out, " </processorResults>").map_err(write_error)?;
write_xml_batch_summary(&report.summary, out)?;
writeln!(out, "</rawReport>").map_err(write_error)?;
Ok(())
}
fn write_xml_repair_batch<W: Write>(
report: &RepairBatchReport,
out: &mut W,
root: &str,
) -> Result<()> {
writeln!(out, r#"<?xml version="1.0" encoding="utf-8"?>"#).map_err(write_error)?;
writeln!(
out,
r#"<{root} engine="pdfv-core" version="{}">"#,
XmlEscapedAttr::new(ENGINE_VERSION)?,
)
.map_err(write_error)?;
if root == "rawRepairReport" {
writeln!(
out,
r#" <processorConfig tasks="metadata"></processorConfig>"#,
)
.map_err(write_error)?;
}
writeln!(out, " <items>").map_err(write_error)?;
for item in &report.items {
write_xml_repair_item(item, out)?;
}
writeln!(out, " </items>").map_err(write_error)?;
write_xml_repair_summary(&report.summary, out)?;
write_xml_warnings(&report.warnings, out, 2)?;
writeln!(out, "</{root}>").map_err(write_error)?;
Ok(())
}
fn write_xml_repair_item<W: Write>(report: &RepairReport, out: &mut W) -> Result<()> {
writeln!(
out,
r#" <repairItem status="{}">"#,
repair_status_text(report.status),
)
.map_err(write_error)?;
write_xml_item(&report.source, out)?;
if let Some(output_path) = &report.output_path {
writeln!(
out,
" <output>{}</output>",
XmlEscapedText::new(&output_path.display().to_string())?,
)
.map_err(write_error)?;
}
if !report.actions.is_empty() {
writeln!(out, " <actions>").map_err(write_error)?;
for action in &report.actions {
writeln!(
out,
r#" <action kind="{}">{}</action>"#,
repair_action_kind(action),
XmlEscapedText::new(&repair_action_text(action))?,
)
.map_err(write_error)?;
}
writeln!(out, " </actions>").map_err(write_error)?;
}
if let Some(refusal) = &report.refusal {
writeln!(
out,
r#" <refusal kind="{}">{}</refusal>"#,
repair_refusal_kind(refusal),
XmlEscapedText::new(&repair_refusal_text(refusal))?,
)
.map_err(write_error)?;
}
write_xml_warnings(&report.warnings, out, 6)?;
writeln!(out, " </repairItem>").map_err(write_error)?;
Ok(())
}
fn write_xml_repair_summary<W: Write>(summary: &RepairBatchSummary, out: &mut W) -> Result<()> {
writeln!(
out,
r#" <repairSummary totalJobs="{}" succeeded="{}" noAction="{}" refused="{}" failed="{}" elapsedMillis="{}"></repairSummary>"#,
summary.total_files,
summary.succeeded,
summary.no_action,
summary.refused,
summary.failed,
summary.elapsed_millis,
)
.map_err(write_error)?;
Ok(())
}
fn raw_validation_tasks(report: &BatchReport) -> String {
let has_features = report
.items
.iter()
.any(|item| item.feature_report.is_some());
let has_policy = report.items.iter().any(|item| item.policy_report.is_some());
let mut tasks = vec!["validation"];
if has_features {
tasks.push("features");
}
if has_policy {
tasks.push("policy");
}
tasks.join(",")
}
fn write_html_batch<W: Write>(report: &BatchReport, out: &mut W) -> Result<()> {
write_html_start(out, "pdfv validation report")?;
writeln!(out, "<h1>Validation Report</h1>").map_err(write_error)?;
writeln!(
out,
"<p>{} valid, {} invalid, {} parse failed, {} encrypted, {} incomplete.</p>",
report.summary.valid,
report.summary.invalid,
report.summary.parse_failures,
report.summary.encrypted,
report.summary.incomplete,
)
.map_err(write_error)?;
writeln!(
out,
"<table><thead><tr><th>Input</th><th>Status</th><th>Profiles</th><th>Features</\
th><th>Policy</th></tr></thead><tbody>"
)
.map_err(write_error)?;
for item in &report.items {
let features = item
.feature_report
.as_ref()
.map_or(String::from("-"), |features| {
features.objects.len().to_string()
});
let policy = item.policy_report.as_ref().map_or("-", |policy| {
if policy.is_compliant {
"compliant"
} else {
"non-compliant"
}
});
writeln!(
out,
"<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
HtmlEscapedText::new(&source_name(&item.source))?,
status_text(item.status),
HtmlEscapedText::new(&profile_list(item))?,
features,
policy,
)
.map_err(write_error)?;
}
writeln!(out, "</tbody></table>").map_err(write_error)?;
write_html_end(out)
}
fn write_html_repair_batch<W: Write>(report: &RepairBatchReport, out: &mut W) -> Result<()> {
write_html_start(out, "pdfv metadata repair report")?;
writeln!(out, "<h1>Metadata Repair Report</h1>").map_err(write_error)?;
writeln!(
out,
"<p>{} repaired, {} unchanged, {} refused, {} failed.</p>",
report.summary.succeeded,
report.summary.no_action,
report.summary.refused,
report.summary.failed,
)
.map_err(write_error)?;
writeln!(
out,
"<table><thead><tr><th>Input</th><th>Status</th><th>Output</th><th>Reason</th></tr></\
thead><tbody>"
)
.map_err(write_error)?;
for item in &report.items {
let output = item
.output_path
.as_ref()
.map_or_else(String::new, |path| path.display().to_string());
let reason = item
.refusal
.as_ref()
.map_or_else(String::new, repair_refusal_text);
writeln!(
out,
"<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
HtmlEscapedText::new(&source_name(&item.source))?,
repair_status_text(item.status),
HtmlEscapedText::new(&output)?,
HtmlEscapedText::new(&reason)?,
)
.map_err(write_error)?;
}
writeln!(out, "</tbody></table>").map_err(write_error)?;
write_html_end(out)
}
fn write_html_start<W: Write>(out: &mut W, title: &str) -> Result<()> {
writeln!(out, "<!doctype html>").map_err(write_error)?;
writeln!(
out,
r#"<html lang="en"><head><meta charset="utf-8"><title>{}</title><style>body{{font-family:system-ui,sans-serif;margin:2rem;color:#1f2937}}table{{border-collapse:collapse;width:100%}}th,td{{border:1px solid #d1d5db;padding:.4rem;text-align:left}}th{{background:#f3f4f6}}</style></head><body>"#,
HtmlEscapedText::new(title)?,
)
.map_err(write_error)?;
Ok(())
}
fn write_html_end<W: Write>(out: &mut W) -> Result<()> {
writeln!(out, "</body></html>").map_err(write_error)?;
Ok(())
}
fn write_xml_job<W: Write>(report: &ValidationReport, out: &mut W) -> Result<()> {
writeln!(out, " <job>").map_err(write_error)?;
write_xml_item(&report.source, out)?;
for profile in &report.profile_reports {
write_xml_validation_report(report.status, profile, out)?;
}
if report.profile_reports.is_empty() {
writeln!(
out,
r#" <validationReport profileName="" statement="{}" isCompliant="false">"#,
XmlEscapedAttr::new(status_statement(report.status))?,
)
.map_err(write_error)?;
writeln!(
out,
r#" <details passedRules="0" failedRules="0" passedChecks="0" failedChecks="0" unsupportedRules="0"></details>"#,
)
.map_err(write_error)?;
writeln!(out, " </validationReport>").map_err(write_error)?;
}
write_xml_parse_facts(&report.parse_facts, out)?;
if let Some(feature_report) = &report.feature_report {
write_xml_feature_report(feature_report, out)?;
}
if let Some(policy_report) = &report.policy_report {
write_xml_policy_report(policy_report, out)?;
}
write_xml_warnings(&report.warnings, out, 6)?;
writeln!(out, " </job>").map_err(write_error)?;
Ok(())
}
fn write_xml_item<W: Write>(source: &InputSummary, out: &mut W) -> Result<()> {
let size = source
.bytes
.map_or_else(String::new, |bytes| format!(r#" size="{bytes}""#));
writeln!(out, " <item{size}>").map_err(write_error)?;
let name = source_name(source);
writeln!(out, " <name>{}</name>", XmlEscapedText::new(&name)?).map_err(write_error)?;
writeln!(out, " </item>").map_err(write_error)?;
Ok(())
}
fn write_xml_validation_report<W: Write>(
status: ValidationStatus,
profile: &ProfileReport,
out: &mut W,
) -> Result<()> {
writeln!(
out,
r#" <validationReport profileName="{}" statement="{}" isCompliant="{}">"#,
XmlEscapedAttr::new(profile.profile.name.as_str())?,
XmlEscapedAttr::new(status_statement(status))?,
profile.is_compliant,
)
.map_err(write_error)?;
let failed_checks = u64::try_from(profile.failed_assertions.len()).unwrap_or(u64::MAX);
let unsupported_rules = u64::try_from(profile.unsupported_rules.len()).unwrap_or(u64::MAX);
let passed_checks = profile.checks_executed.saturating_sub(failed_checks);
let passed_rules = profile.rules_executed.saturating_sub(profile.failed_rules);
writeln!(
out,
r#" <details passedRules="{passed_rules}" failedRules="{}" passedChecks="{passed_checks}" failedChecks="{failed_checks}" unsupportedRules="{unsupported_rules}"></details>"#,
profile.failed_rules,
)
.map_err(write_error)?;
write_xml_assertions("failedChecks", &profile.failed_assertions, out)?;
write_xml_assertions("passedChecks", &profile.passed_assertions, out)?;
write_xml_unsupported_rules(&profile.unsupported_rules, out)?;
writeln!(out, " </validationReport>").map_err(write_error)?;
Ok(())
}
fn write_xml_assertions<W: Write>(
element: &str,
assertions: &[Assertion],
out: &mut W,
) -> Result<()> {
if assertions.is_empty() {
return Ok(());
}
writeln!(out, " <{element}>").map_err(write_error)?;
for assertion in assertions {
writeln!(
out,
r#" <check ruleId="{}" status="{}" location="{}">"#,
XmlEscapedAttr::new(assertion.rule_id.0.as_str())?,
assertion_status_text(assertion.status),
XmlEscapedAttr::new(&location_text(&assertion.location))?,
)
.map_err(write_error)?;
writeln!(
out,
" <description>{}</description>",
XmlEscapedText::new(assertion.description.as_str())?,
)
.map_err(write_error)?;
if let Some(message) = &assertion.message {
writeln!(
out,
" <message>{}</message>",
XmlEscapedText::new(message.as_str())?,
)
.map_err(write_error)?;
}
if !assertion.error_arguments.is_empty() {
writeln!(out, " <errorArguments>").map_err(write_error)?;
for argument in &assertion.error_arguments {
writeln!(
out,
r#" <argument name="{}">{}</argument>"#,
XmlEscapedAttr::new(argument.name.as_str())?,
XmlEscapedText::new(argument.value.as_str())?,
)
.map_err(write_error)?;
}
writeln!(out, " </errorArguments>").map_err(write_error)?;
}
writeln!(out, " </check>").map_err(write_error)?;
}
writeln!(out, " </{element}>").map_err(write_error)?;
Ok(())
}
fn write_xml_unsupported_rules<W: Write>(rules: &[UnsupportedRule], out: &mut W) -> Result<()> {
if rules.is_empty() {
return Ok(());
}
writeln!(out, " <unsupportedRules>").map_err(write_error)?;
for rule in rules {
writeln!(
out,
r#" <rule profileId="{}" ruleId="{}">"#,
XmlEscapedAttr::new(rule.profile_id.as_str())?,
XmlEscapedAttr::new(rule.rule_id.0.as_str())?,
)
.map_err(write_error)?;
if let Some(fragment) = &rule.expression_fragment {
writeln!(
out,
" <expression>{}</expression>",
XmlEscapedText::new(fragment.as_str())?,
)
.map_err(write_error)?;
}
writeln!(
out,
" <reason>{}</reason>",
XmlEscapedText::new(rule.reason.as_str())?,
)
.map_err(write_error)?;
if !rule.references.is_empty() {
writeln!(out, " <references>").map_err(write_error)?;
for reference in &rule.references {
writeln!(
out,
r#" <reference specification="{}" clause="{}"></reference>"#,
XmlEscapedAttr::new(reference.specification.as_str())?,
XmlEscapedAttr::new(reference.clause.as_str())?,
)
.map_err(write_error)?;
}
writeln!(out, " </references>").map_err(write_error)?;
}
writeln!(out, " </rule>").map_err(write_error)?;
}
writeln!(out, " </unsupportedRules>").map_err(write_error)?;
Ok(())
}
fn write_xml_feature_report<W: Write>(report: &FeatureReport, out: &mut W) -> Result<()> {
writeln!(
out,
r#" <featureReport visitedObjects="{}" extractedObjects="{}" truncated="{}">"#,
report.visited_objects,
report.objects.len(),
report.truncated,
)
.map_err(write_error)?;
for object in &report.objects {
writeln!(
out,
r#" <featureObject family="{}" location="{}">"#,
XmlEscapedAttr::new(object.family.as_str())?,
XmlEscapedAttr::new(&location_text(&object.location))?,
)
.map_err(write_error)?;
for (name, value) in &object.properties {
writeln!(
out,
r#" <property name="{}">"#,
XmlEscapedAttr::new(name.as_str())?,
)
.map_err(write_error)?;
write_xml_feature_value(value, out, 12)?;
writeln!(out, " </property>").map_err(write_error)?;
}
writeln!(out, " </featureObject>").map_err(write_error)?;
}
writeln!(out, " </featureReport>").map_err(write_error)?;
Ok(())
}
fn write_xml_policy_report<W: Write>(report: &PolicyReport, out: &mut W) -> Result<()> {
writeln!(
out,
r#" <policyReport name="{}" isCompliant="{}">"#,
XmlEscapedAttr::new(report.name.as_ref().map_or("", BoundedText::as_str))?,
report.is_compliant,
)
.map_err(write_error)?;
for result in &report.results {
writeln!(
out,
r#" <rule id="{}" passed="{}" matches="{}">"#,
XmlEscapedAttr::new(result.id.as_str())?,
result.passed,
result.matches,
)
.map_err(write_error)?;
writeln!(
out,
" <description>{}</description>",
XmlEscapedText::new(result.description.as_str())?,
)
.map_err(write_error)?;
writeln!(
out,
" <message>{}</message>",
XmlEscapedText::new(result.message.as_str())?,
)
.map_err(write_error)?;
writeln!(out, " </rule>").map_err(write_error)?;
}
writeln!(out, " </policyReport>").map_err(write_error)?;
Ok(())
}
fn reference_suffix(references: &[SpecReference]) -> String {
let Some(reference) = references.first() else {
return String::new();
};
format!(
" [{} {}]",
reference.specification.as_str(),
reference.clause.as_str()
)
}
fn write_xml_feature_value<W: Write>(
value: &FeatureValue,
out: &mut W,
indent: usize,
) -> Result<()> {
let spaces = " ".repeat(indent);
match value {
FeatureValue::Null => writeln!(out, r#"{spaces}<value type="null"></value>"#),
FeatureValue::Bool(value) => {
writeln!(out, r#"{spaces}<value type="bool">{value}</value>"#)
}
FeatureValue::Number(value) => {
writeln!(out, r#"{spaces}<value type="number">{value}</value>"#)
}
FeatureValue::String(value) => writeln!(
out,
r#"{spaces}<value type="string">{}</value>"#,
XmlEscapedText::new(value.as_str())?,
),
FeatureValue::RedactedString { bytes } => writeln!(
out,
r#"{spaces}<value type="redactedString" bytes="{bytes}"></value>"#
),
FeatureValue::ObjectKey(value) => writeln!(
out,
r#"{spaces}<value type="objectKey" number="{}" generation="{}"></value>"#,
value.number, value.generation,
),
FeatureValue::List(values) => {
writeln!(out, r#"{spaces}<value type="list">"#).map_err(write_error)?;
for item in values {
write_xml_feature_value(item, out, indent.saturating_add(2))?;
}
writeln!(out, "{spaces}</value>")
}
}
.map_err(write_error)?;
Ok(())
}
fn write_xml_parse_facts<W: Write>(facts: &[ParseFact], out: &mut W) -> Result<()> {
if facts.is_empty() {
return Ok(());
}
writeln!(out, " <parseFacts>").map_err(write_error)?;
for fact in facts {
match fact {
ParseFact::Header {
offset,
version,
had_leading_bytes,
} => writeln!(
out,
r#" <header offset="{offset}" version="{}.{}" hadLeadingBytes="{had_leading_bytes}"></header>"#,
version.major,
version.minor,
)
.map_err(write_error)?,
ParseFact::PostEofData { bytes } => {
writeln!(out, r#" <postEofData bytes="{bytes}"></postEofData>"#)
.map_err(write_error)?;
}
ParseFact::Xref { section, fact } => writeln!(
out,
r#" <xref location="{}" fact="{}"></xref>"#,
XmlEscapedAttr::new(&location_text(section))?,
XmlEscapedAttr::new(&xref_fact_text(fact))?,
)
.map_err(write_error)?,
ParseFact::Stream { object, fact } => writeln!(
out,
r#" <stream object="{} {}" fact="{}"></stream>"#,
object.number,
object.generation,
XmlEscapedAttr::new(&stream_fact_text(fact))?,
)
.map_err(write_error)?,
ParseFact::Encryption {
encrypted,
handler,
version,
revision,
algorithm,
decrypted,
} => writeln!(
out,
r#" <encryption encrypted="{encrypted}" handler="{}" version="{}" revision="{}" algorithm="{}" decrypted="{decrypted}"></encryption>"#,
XmlEscapedAttr::new(handler.as_ref().map_or("", Identifier::as_str))?,
version.map_or_else(String::new, |value| value.to_string()),
revision.map_or_else(String::new, |value| value.to_string()),
XmlEscapedAttr::new(algorithm.as_ref().map_or("", Identifier::as_str))?,
)
.map_err(write_error)?,
ParseFact::Xmp { object, fact } => writeln!(
out,
r#" <xmp object="{} {}" fact="{}"></xmp>"#,
object.number,
object.generation,
XmlEscapedAttr::new(&xmp_fact_text(fact))?,
)
.map_err(write_error)?,
}
}
writeln!(out, " </parseFacts>").map_err(write_error)?;
Ok(())
}
fn write_xml_warnings<W: Write>(
warnings: &[ValidationWarning],
out: &mut W,
indent: usize,
) -> Result<()> {
if warnings.is_empty() {
return Ok(());
}
let spaces = " ".repeat(indent);
writeln!(out, "{spaces}<warnings>").map_err(write_error)?;
for warning in warnings {
writeln!(
out,
"{spaces} <warning>{}</warning>",
XmlEscapedText::new(&warning_text(warning))?,
)
.map_err(write_error)?;
}
writeln!(out, "{spaces}</warnings>").map_err(write_error)?;
Ok(())
}
fn write_xml_batch_summary<W: Write>(summary: &BatchSummary, out: &mut W) -> Result<()> {
writeln!(
out,
r#" <batchSummary totalJobs="{}" failedToParse="{}" encrypted="{}" incomplete="{}" internalErrors="{}">"#,
summary.total_files,
summary.parse_failures,
summary.encrypted,
summary.incomplete,
summary.internal_errors,
)
.map_err(write_error)?;
writeln!(
out,
r#" <validationReports compliant="{}" nonCompliant="{}" failedJobs="{}">{}</validationReports>"#,
summary.valid,
summary.invalid,
summary
.parse_failures
.saturating_add(summary.encrypted)
.saturating_add(summary.incomplete)
.saturating_add(summary.internal_errors),
summary.valid.saturating_add(summary.invalid),
)
.map_err(write_error)?;
writeln!(
out,
r#" <duration elapsedMillis="{}"></duration>"#,
summary.elapsed_millis,
)
.map_err(write_error)?;
writeln!(out, " </batchSummary>").map_err(write_error)?;
Ok(())
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
struct CheckCounts {
passed: u64,
failed: u64,
unsupported: u64,
}
fn check_counts(report: &ValidationReport) -> CheckCounts {
report
.profile_reports
.iter()
.fold(CheckCounts::default(), |mut counts, profile| {
let failed = profile.failed_rules;
let unsupported = u64::try_from(profile.unsupported_rules.len()).unwrap_or(u64::MAX);
counts.failed = counts.failed.saturating_add(failed);
counts.unsupported = counts.unsupported.saturating_add(unsupported);
counts.passed = counts
.passed
.saturating_add(profile.checks_executed.saturating_sub(failed));
counts
})
}
fn profile_list(report: &ValidationReport) -> String {
let profiles = report
.profile_reports
.iter()
.map(|profile| profile.profile.id.as_str())
.collect::<Vec<_>>();
if profiles.is_empty() {
String::from("-")
} else {
profiles.join(", ")
}
}
fn source_name(source: &InputSummary) -> String {
source.path.as_ref().map_or_else(
|| String::from("<memory>"),
|path| path.display().to_string(),
)
}
fn status_text(status: ValidationStatus) -> &'static str {
match status {
ValidationStatus::Valid => "valid",
ValidationStatus::Invalid => "invalid",
ValidationStatus::Encrypted => "encrypted",
ValidationStatus::Incomplete => "incomplete",
ValidationStatus::ParseFailed => "parse failed",
}
}
fn repair_status_text(status: RepairStatus) -> &'static str {
match status {
RepairStatus::Succeeded => "succeeded",
RepairStatus::NoAction => "no action",
RepairStatus::Refused => "refused",
RepairStatus::Failed => "failed",
}
}
fn repair_action_kind(action: &RepairAction) -> &'static str {
match action {
RepairAction::CopiedUnchanged => "copiedUnchanged",
RepairAction::MetadataRewritten { .. } => "metadataRewritten",
}
}
fn repair_action_text(action: &RepairAction) -> String {
match action {
RepairAction::CopiedUnchanged => String::from("copied unchanged"),
RepairAction::MetadataRewritten { description } => description.as_str().to_owned(),
}
}
fn repair_refusal_kind(refusal: &RepairRefusal) -> &'static str {
match refusal {
RepairRefusal::ParseFailed { .. } => "parseFailed",
RepairRefusal::Encrypted => "encrypted",
RepairRefusal::AmbiguousFlavour { .. } => "ambiguousFlavour",
RepairRefusal::UnsupportedValidationStatus { .. } => "unsupportedValidationStatus",
RepairRefusal::OutputWouldModifyInput => "outputWouldModifyInput",
RepairRefusal::InvalidOutputPath { .. } => "invalidOutputPath",
}
}
fn repair_refusal_text(refusal: &RepairRefusal) -> String {
match refusal {
RepairRefusal::ParseFailed { reason } => {
format!("input could not be parsed: {}", reason.as_str())
}
RepairRefusal::Encrypted => String::from("encrypted inputs are not repaired"),
RepairRefusal::AmbiguousFlavour { selected } => {
format!("repair requires exactly one selected flavour, got {selected}")
}
RepairRefusal::UnsupportedValidationStatus { status } => {
format!(
"metadata repair is unsupported for {} inputs",
status_text(*status)
)
}
RepairRefusal::OutputWouldModifyInput => {
String::from("output path would modify input in place")
}
RepairRefusal::InvalidOutputPath { reason } => reason.as_str().to_owned(),
}
}
fn exit_category_text(category: ExitCategory) -> &'static str {
match category {
ExitCategory::Success => "success",
ExitCategory::ValidationFailed => "validation failed",
ExitCategory::ProcessingFailed => "processing failed",
ExitCategory::InternalError => "internal error",
}
}
fn location_text(location: &ObjectLocation) -> String {
if let Some(path) = &location.path {
return path.to_string();
}
if let Some(object) = location.object {
return format!("object {} {}", object.number, object.generation);
}
if let Some(offset) = location.offset {
return format!("offset {offset}");
}
String::from("unknown")
}
fn assertion_message(assertion: &Assertion) -> &str {
assertion
.message
.as_ref()
.unwrap_or(&assertion.description)
.as_str()
}
fn assertion_status_text(status: AssertionStatus) -> &'static str {
match status {
AssertionStatus::Passed => "passed",
AssertionStatus::Failed => "failed",
}
}
fn status_statement(status: ValidationStatus) -> &'static str {
match status {
ValidationStatus::Valid => "PDF file is compliant with Validation Profile requirements.",
ValidationStatus::Invalid => {
"PDF file is not compliant with Validation Profile requirements."
}
ValidationStatus::Encrypted => "PDF file is encrypted and could not be validated.",
ValidationStatus::Incomplete => "Validation did not complete for all required rules.",
ValidationStatus::ParseFailed => "PDF file could not be parsed.",
}
}
fn xref_fact_text(fact: &XrefFact) -> String {
match fact {
XrefFact::EolMarkersComply => String::from("eolMarkersComply"),
XrefFact::MalformedClassic => String::from("malformedClassic"),
XrefFact::XrefStreamUnsupported => String::from("xrefStreamUnsupported"),
XrefFact::XrefStreamParsed {
entries,
compressed_entries,
} => format!("xrefStreamParsed entries={entries} compressedEntries={compressed_entries}"),
XrefFact::PrevChain { offset } => format!("prevChain offset={offset}"),
XrefFact::HybridReference { offset } => format!("hybridReference offset={offset}"),
XrefFact::ObjectStreamParsed => String::from("objectStreamParsed"),
}
}
fn stream_fact_text(fact: &StreamFact) -> String {
match fact {
StreamFact::Length {
declared,
discovered,
} => format!("length declared={declared} discovered={discovered}"),
StreamFact::KeywordSpacing {
stream_keyword_crlf_compliant,
endstream_keyword_eol_compliant,
} => format!(
"keywordSpacing streamKeywordCRLFCompliant={stream_keyword_crlf_compliant} \
endstreamKeywordEolCompliant={endstream_keyword_eol_compliant}"
),
StreamFact::Decoded { bytes } => format!("decoded bytes={bytes}"),
StreamFact::FilterDecoded {
filter,
input_bytes,
output_bytes,
} => format!(
"filterDecoded filter={} inputBytes={input_bytes} outputBytes={output_bytes}",
filter.as_str()
),
StreamFact::FilterMetadataMode { filter, bytes } => {
format!(
"filterMetadataMode filter={} bytes={bytes}",
filter.as_str()
)
}
}
}
fn xmp_fact_text(fact: &XmpFact) -> String {
match fact {
XmpFact::PacketParsed {
bytes,
namespaces,
claims,
} => format!("packetParsed bytes={bytes} namespaces={namespaces} claims={claims}"),
XmpFact::MissingPacketWrapper => String::from("missingPacketWrapper"),
XmpFact::FlavourClaim {
family,
display_flavour,
namespace_uri,
} => format!(
"flavourClaim family={} displayFlavour={} namespaceUri={}",
family.as_str(),
display_flavour.as_str(),
namespace_uri.as_str()
),
XmpFact::Malformed { reason } => format!("malformed reason={}", reason.as_str()),
XmpFact::HostileXmlRejected { reason } => {
format!("hostileXmlRejected reason={}", reason.as_str())
}
}
}
fn warning_text(warning: &ValidationWarning) -> String {
warning.message_text().to_string()
}
fn duration_millis(duration: Duration) -> u64 {
u64::try_from(duration.as_millis()).unwrap_or(u64::MAX)
}
fn write_error(source: std::io::Error) -> PdfvError {
ReportError::Write { source }.into()
}
fn format_optional_path(path: Option<&PathBuf>) -> String {
path.map(|path| format!(" at {}", path.display()))
.unwrap_or_default()
}
#[derive(Clone, Copy, Debug)]
struct XmlEscapedText<'a>(&'a str);
impl<'a> XmlEscapedText<'a> {
fn new(value: &'a str) -> Result<Self> {
ensure_xml_text(value)?;
Ok(Self(value))
}
}
impl fmt::Display for XmlEscapedText<'_> {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
for character in self.0.chars() {
match character {
'&' => formatter.write_str("&")?,
'<' => formatter.write_str("<")?,
'>' => formatter.write_str(">")?,
'"' => formatter.write_str(""")?,
'\'' => formatter.write_str("'")?,
_ => formatter.write_str(character.encode_utf8(&mut [0; 4]))?,
}
}
Ok(())
}
}
#[derive(Clone, Copy, Debug)]
struct XmlEscapedAttr<'a>(&'a str);
impl<'a> XmlEscapedAttr<'a> {
fn new(value: &'a str) -> Result<Self> {
ensure_xml_text(value)?;
Ok(Self(value))
}
}
impl fmt::Display for XmlEscapedAttr<'_> {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
XmlEscapedText(self.0).fmt(formatter)
}
}
#[derive(Clone, Copy, Debug)]
struct HtmlEscapedText<'a>(&'a str);
impl<'a> HtmlEscapedText<'a> {
fn new(value: &'a str) -> Result<Self> {
ensure_xml_text(value)?;
Ok(Self(value))
}
}
impl fmt::Display for HtmlEscapedText<'_> {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
XmlEscapedText(self.0).fmt(formatter)
}
}
fn ensure_xml_text(value: &str) -> Result<()> {
if value.chars().all(is_xml_char) {
return Ok(());
}
Err(ReportError::Xml {
message: BoundedText::unchecked("text contains characters forbidden by XML 1.0"),
}
.into())
}
fn is_xml_char(character: char) -> bool {
matches!(character, '\u{09}' | '\u{0A}' | '\u{0D}')
|| ('\u{20}'..='\u{D7FF}').contains(&character)
|| ('\u{E000}'..='\u{FFFD}').contains(&character)
|| ('\u{10000}'..='\u{10FFFF}').contains(&character)
}
#[cfg(test)]
mod tests {
use std::{
collections::BTreeMap,
error::Error as StdError,
num::{NonZeroU32, NonZeroU64},
path::PathBuf,
time::Duration,
};
use super::{
Assertion, AssertionStatus, BatchReport, BoundedText, ErrorArgument, ExitCategory,
FeatureObject, FeatureReport, FeatureValue, HtmlReportWriter, Identifier, InputKind,
InputSummary, JsonReportWriter, MaxDisplayedFailures, MetadataRepairOptions,
MetadataRepairer, ObjectLocation, ObjectTypeName, PdfVersion, PolicyReport,
PolicyRuleResult, ProfileIdentity, ProfileReport, PropertyName, RawXmlReportWriter,
RepairAction, RepairBatchReport, RepairRefusal, RepairReport, RepairStatus, ReportFormat,
ReportWriter, RuleId, TextReportWriter, ValidationOptions, ValidationReport,
ValidationStatus, XmlReportWriter,
};
fn sample_report() -> std::result::Result<ValidationReport, Box<dyn StdError>> {
let profile_id = Identifier::new("pdfa-1b")?;
let rule_id = RuleId(Identifier::new("6.1.2-1")?);
Ok(ValidationReport::builder()
.engine_version("0.1.0".to_owned())
.source(InputSummary::new(InputKind::Memory, None, Some(42)))
.status(ValidationStatus::Invalid)
.flavours(vec![super::ValidationFlavour::new(
"pdfa",
NonZeroU32::MIN,
"b",
)?])
.profile_reports(vec![
ProfileReport::builder()
.profile(ProfileIdentity {
id: profile_id.clone(),
name: BoundedText::new("PDF/A-1B", 64)?,
version: None,
})
.is_compliant(false)
.checks_executed(1)
.rules_executed(1)
.failed_rules(1)
.failed_assertions(vec![Assertion {
ordinal: NonZeroU64::MIN,
rule_id,
status: AssertionStatus::Failed,
description: BoundedText::new("Header must start at byte zero", 128)?,
location: ObjectLocation {
object: None,
offset: Some(0),
path: None,
},
object_context: None,
message: Some(BoundedText::new("Header offset is non-zero", 128)?),
error_arguments: vec![ErrorArgument {
name: Identifier::new("offset")?,
value: BoundedText::new("12", 16)?,
}],
}])
.passed_assertions(Vec::new())
.unsupported_rules(Vec::new())
.build(),
])
.parse_facts(vec![super::ParseFact::Header {
offset: 12,
version: PdfVersion { major: 1, minor: 7 },
had_leading_bytes: true,
}])
.warnings(Vec::new())
.task_durations(Vec::new())
.build())
}
fn sample_feature_policy_report() -> std::result::Result<ValidationReport, Box<dyn StdError>> {
let mut report = sample_report()?;
let mut properties = BTreeMap::new();
properties.insert(PropertyName::new("hasMetadata")?, FeatureValue::Bool(false));
report.feature_report = Some(
FeatureReport::builder()
.objects(vec![
FeatureObject::builder()
.family(ObjectTypeName::new("catalog".to_owned())?)
.location(ObjectLocation {
object: None,
offset: None,
path: Some(BoundedText::new("root/catalog[0]", 64)?),
})
.context(BoundedText::new("root/catalog[0]", 64)?)
.properties(properties)
.build(),
])
.visited_objects(1)
.selected_families(vec![ObjectTypeName::new("catalog".to_owned())?])
.truncated(false)
.build(),
);
report.policy_report = Some(
PolicyReport::builder()
.name(Some(BoundedText::new("catalog-policy", 64)?))
.is_compliant(true)
.results(vec![
PolicyRuleResult::builder()
.id(Identifier::new("catalog-has-no-metadata")?)
.description(BoundedText::new("Catalog metadata is absent", 128)?)
.passed(true)
.matches(1)
.message(BoundedText::new(
"policy rule catalog-has-no-metadata passed with 1 matching feature \
objects",
128,
)?)
.build(),
])
.build(),
);
Ok(report)
}
fn sample_repair_report() -> RepairReport {
RepairReport::builder()
.engine_version("0.1.0".to_owned())
.source(InputSummary::new(
InputKind::File,
Some(PathBuf::from("input.pdf")),
Some(42),
))
.output_path(Some(PathBuf::from("out/repaired-input.pdf")))
.status(RepairStatus::NoAction)
.actions(vec![RepairAction::CopiedUnchanged])
.refusal(None)
.warnings(Vec::new())
.task_durations(Vec::new())
.build()
}
#[test]
fn test_should_apply_validation_options_defaults() {
let options = ValidationOptions::default();
assert!(options.report_parse_warnings);
assert!(!options.record_passed_assertions);
assert_eq!(options.max_failed_assertions_per_rule.get(), 1);
}
#[test]
fn test_should_reject_zero_max_displayed_failures() {
let result = MaxDisplayedFailures::try_from(0);
assert!(result.is_err());
}
#[test]
fn test_should_reject_invalid_identifier() {
let result = Identifier::new("bad identifier");
assert!(result.is_err());
}
#[test]
fn test_should_serialize_validation_report_as_camel_case_json()
-> std::result::Result<(), Box<dyn StdError>> {
let report = sample_report()?;
let json = serde_json::to_string_pretty(&report)?;
let expected = r#"{
"engineVersion": "0.1.0",
"source": {
"kind": "memory",
"bytes": 42
},
"status": "invalid",
"flavours": [
{
"family": "pdfa",
"part": 1,
"conformance": "b"
}
],
"profileReports": [
{
"profile": {
"id": "pdfa-1b",
"name": "PDF/A-1B"
},
"isCompliant": false,
"checksExecuted": 1,
"rulesExecuted": 1,
"failedRules": 1,
"failedAssertions": [
{
"ordinal": 1,
"ruleId": "6.1.2-1",
"status": "failed",
"description": "Header must start at byte zero",
"location": {
"offset": 0
},
"message": "Header offset is non-zero",
"errorArguments": [
{
"name": "offset",
"value": "12"
}
]
}
],
"passedAssertions": [],
"unsupportedRules": []
}
],
"parseFacts": [
{
"kind": "header",
"offset": 12,
"version": {
"major": 1,
"minor": 7
},
"hadLeadingBytes": true
}
],
"warnings": [],
"taskDurations": []
}"#;
assert_eq!(json, expected);
Ok(())
}
#[test]
fn test_should_write_compact_json_report() -> std::result::Result<(), Box<dyn StdError>> {
let report = sample_report()?;
let mut output = Vec::new();
JsonReportWriter::compact()
.write_report(&report, &mut output)
.map_err(Box::<dyn StdError>::from)?;
let json = String::from_utf8(output)?;
assert!(json.contains("\"engineVersion\":\"0.1.0\""));
Ok(())
}
#[test]
fn test_should_write_text_report() -> std::result::Result<(), Box<dyn StdError>> {
let report = sample_report()?;
let mut output = Vec::new();
TextReportWriter
.write_report(&report, &mut output)
.map_err(Box::<dyn StdError>::from)?;
let text = String::from_utf8(output)?;
let expected = "\
<memory>: invalid
profiles: pdfa-1b
checks: 0 passed, 1 failed, 0 unsupported
first failures:
6.1.2-1 at offset 0: Header offset is non-zero
";
assert_eq!(text, expected);
Ok(())
}
#[test]
fn test_should_write_xml_report() -> std::result::Result<(), Box<dyn StdError>> {
let report = sample_report()?;
let mut output = Vec::new();
XmlReportWriter
.write_report(&report, &mut output)
.map_err(Box::<dyn StdError>::from)?;
let xml = String::from_utf8(output)?;
assert!(xml.contains(r#"<?xml version="1.0" encoding="utf-8"?>"#));
assert!(xml.contains("<report>"));
assert!(xml.contains(r#"<validationReport profileName="PDF/A-1B""#));
assert!(xml.contains(r#"<details passedRules="0" failedRules="1""#));
assert!(xml.contains(r#"<check ruleId="6.1.2-1" status="failed" location="offset 0">"#));
assert!(xml.contains(r#"<batchSummary totalJobs="1""#));
Ok(())
}
#[test]
fn test_should_write_raw_xml_report_with_feature_and_policy_sections()
-> std::result::Result<(), Box<dyn StdError>> {
let report = sample_feature_policy_report()?;
let mut output = Vec::new();
RawXmlReportWriter
.write_report(&report, &mut output)
.map_err(Box::<dyn StdError>::from)?;
let xml = String::from_utf8(output)?;
let expected = format!(
r#"<?xml version="1.0" encoding="utf-8"?>
<rawReport engine="pdfv-core" version="{version}">
<processorConfig tasks="validation,features,policy"></processorConfig>
<processorResults>
<processorResult status="invalid">
<item size="42">
<name><memory></name>
</item>
<validationReport profileName="PDF/A-1B" statement="PDF file is not compliant with Validation Profile requirements." isCompliant="false">
<details passedRules="0" failedRules="1" passedChecks="0" failedChecks="1" unsupportedRules="0"></details>
<failedChecks>
<check ruleId="6.1.2-1" status="failed" location="offset 0">
<description>Header must start at byte zero</description>
<message>Header offset is non-zero</message>
<errorArguments>
<argument name="offset">12</argument>
</errorArguments>
</check>
</failedChecks>
</validationReport>
<featureReport visitedObjects="1" extractedObjects="1" truncated="false">
<featureObject family="catalog" location="root/catalog[0]">
<property name="hasMetadata">
<value type="bool">false</value>
</property>
</featureObject>
</featureReport>
<policyReport name="catalog-policy" isCompliant="true">
<rule id="catalog-has-no-metadata" passed="true" matches="1">
<description>Catalog metadata is absent</description>
<message>policy rule catalog-has-no-metadata passed with 1 matching feature objects</message>
</rule>
</policyReport>
<parseFacts>
<header offset="12" version="1.7" hadLeadingBytes="true"></header>
</parseFacts>
</processorResult>
</processorResults>
<batchSummary totalJobs="1" failedToParse="0" encrypted="0" incomplete="0" internalErrors="0">
<validationReports compliant="0" nonCompliant="1" failedJobs="0">1</validationReports>
<duration elapsedMillis="0"></duration>
</batchSummary>
</rawReport>
"#,
version = super::ENGINE_VERSION,
);
assert_eq!(xml, expected);
Ok(())
}
#[test]
fn test_should_write_static_html_report() -> std::result::Result<(), Box<dyn StdError>> {
let report = sample_report()?;
let mut output = Vec::new();
HtmlReportWriter
.write_report(&report, &mut output)
.map_err(Box::<dyn StdError>::from)?;
let html = String::from_utf8(output)?;
let expected = "\
<!doctype html>
<html lang=\"en\"><head><meta charset=\"utf-8\"><title>pdfv validation \
report</title><style>body{font-family:system-ui,sans-serif;margin:2rem;\
color:#1f2937}table{border-collapse:collapse;width:100%}th,td{border:1px \
solid #d1d5db;padding:.4rem;text-align:left}th{background:#f3f4f6}</\
style></head><body>
<h1>Validation Report</h1>
<p>0 valid, 1 invalid, 0 parse failed, 0 encrypted, 0 incomplete.</p>
<table><thead><tr><th>Input</th><th>Status</th><th>Profiles</th><th>Features</th><th>Policy</th></\
tr></thead><tbody>
<tr><td><memory></td><td>invalid</td><td>pdfa-1b</td><td>-</td><td>-</td></tr>
</tbody></table>
</body></html>
";
assert_eq!(html, expected);
Ok(())
}
#[test]
fn test_should_serialize_repair_report_and_summary()
-> std::result::Result<(), Box<dyn StdError>> {
let report = sample_repair_report();
let json = serde_json::to_string_pretty(&report)?;
assert!(json.contains(r#""status": "noAction""#));
assert!(json.contains(r#""kind": "copiedUnchanged""#));
let refused = RepairReport::builder()
.engine_version("0.1.0".to_owned())
.source(InputSummary::new(
InputKind::File,
Some(PathBuf::from("bad.pdf")),
None,
))
.output_path(None)
.status(RepairStatus::Refused)
.actions(Vec::new())
.refusal(Some(RepairRefusal::Encrypted))
.warnings(Vec::new())
.task_durations(Vec::new())
.build();
let batch =
RepairBatchReport::from_items(vec![report, refused], Vec::new(), Duration::ZERO);
assert_eq!(batch.summary.no_action, 1);
assert_eq!(batch.summary.refused, 1);
assert_eq!(
batch.summary.worst_exit_category,
ExitCategory::ProcessingFailed
);
Ok(())
}
#[test]
fn test_should_write_repair_raw_xml_and_html() -> std::result::Result<(), Box<dyn StdError>> {
let report = sample_repair_report();
let mut raw = Vec::new();
let mut html = Vec::new();
ReportFormat::RawXml
.write_repair_report(&report, &mut raw)
.map_err(Box::<dyn StdError>::from)?;
ReportFormat::Html
.write_repair_report(&report, &mut html)
.map_err(Box::<dyn StdError>::from)?;
let raw = String::from_utf8(raw)?;
let html = String::from_utf8(html)?;
let expected_raw = format!(
r#"<?xml version="1.0" encoding="utf-8"?>
<rawRepairReport engine="pdfv-core" version="{version}">
<processorConfig tasks="metadata"></processorConfig>
<items>
<repairItem status="no action">
<item size="42">
<name>input.pdf</name>
</item>
<output>out/repaired-input.pdf</output>
<actions>
<action kind="copiedUnchanged">copied unchanged</action>
</actions>
</repairItem>
</items>
<repairSummary totalJobs="1" succeeded="0" noAction="1" refused="0" failed="0" elapsedMillis="0"></repairSummary>
</rawRepairReport>
"#,
version = super::ENGINE_VERSION,
);
let expected_html =
"\
<!doctype html>
<html lang=\"en\"><head><meta charset=\"utf-8\"><title>pdfv metadata repair \
report</title><style>body{font-family:system-ui,sans-serif;margin:2rem;color:#\
1f2937}table{border-collapse:collapse;width:100%}th,td{border:1px solid \
#d1d5db;padding:.4rem;text-align:left}th{background:#f3f4f6}</style></head><body>
<h1>Metadata Repair Report</h1>
<p>0 repaired, 1 unchanged, 0 refused, 0 failed.</p>
<table><thead><tr><th>Input</th><th>Status</th><th>Output</th><th>Reason</th></tr></thead><tbody>
<tr><td>input.pdf</td><td>no action</td><td>out/repaired-input.pdf</td><td></td></tr>
</tbody></table>
</body></html>
";
assert_eq!(raw, expected_raw);
assert_eq!(html, expected_html);
Ok(())
}
#[test]
#[allow(
clippy::disallowed_methods,
reason = "unit test creates local repair files synchronously"
)]
fn test_should_refuse_repair_when_output_already_exists_without_removing_it()
-> std::result::Result<(), Box<dyn StdError>> {
let temp = tempfile::tempdir()?;
let input = temp.path().join("input.pdf");
let output_dir = temp.path().join("out");
let output = output_dir.join("input.pdf");
std::fs::create_dir(&output_dir)?;
std::fs::write(&input, b"not a valid pdf")?;
std::fs::write(&output, b"existing output")?;
let repairer = MetadataRepairer::new(MetadataRepairOptions::new(
ValidationOptions::default(),
&output_dir,
"",
)?)?;
let report = repairer.repair_path(&input)?;
assert_eq!(report.status, RepairStatus::Refused);
assert!(matches!(
report.refusal,
Some(RepairRefusal::InvalidOutputPath { .. })
));
assert_eq!(std::fs::read(&output)?, b"existing output");
Ok(())
}
#[test]
fn test_should_reject_xml_forbidden_text() -> std::result::Result<(), Box<dyn StdError>> {
let mut report = sample_report()?;
let Some(profile) = report.profile_reports.first_mut() else {
return Err("sample report must contain profile".into());
};
profile.profile.name = BoundedText::unchecked("bad\u{1}profile");
let mut output = Vec::new();
let result = XmlReportWriter.write_report(&report, &mut output);
assert!(matches!(
result,
Err(super::PdfvError::Report(super::ReportError::Xml { .. }))
));
Ok(())
}
#[test]
fn test_should_dispatch_pretty_json_report_format() -> std::result::Result<(), Box<dyn StdError>>
{
let report = sample_report()?;
let mut output = Vec::new();
ReportFormat::JsonPretty
.write_report(&report, &mut output)
.map_err(Box::<dyn StdError>::from)?;
let json = String::from_utf8(output)?;
assert!(json.contains("\n \"engineVersion\": \"0.1.0\""));
Ok(())
}
#[test]
fn test_should_dispatch_xml_report_format() -> std::result::Result<(), Box<dyn StdError>> {
let report = sample_report()?;
let mut output = Vec::new();
ReportFormat::Xml
.write_report(&report, &mut output)
.map_err(Box::<dyn StdError>::from)?;
let xml = String::from_utf8(output)?;
assert!(xml.contains("<validationReport"));
Ok(())
}
#[test]
fn test_should_compute_batch_summary() -> std::result::Result<(), Box<dyn StdError>> {
let valid = ValidationReport::builder()
.engine_version("0.1.0".to_owned())
.source(InputSummary::new(InputKind::Memory, None, Some(42)))
.status(ValidationStatus::Valid)
.flavours(Vec::new())
.profile_reports(Vec::new())
.parse_facts(Vec::new())
.warnings(Vec::new())
.task_durations(Vec::new())
.build();
let invalid = sample_report()?;
let batch = BatchReport::from_items(vec![valid, invalid], Vec::new(), Duration::ZERO);
assert_eq!(batch.summary.total_files, 2);
assert_eq!(batch.summary.valid, 1);
assert_eq!(batch.summary.invalid, 1);
assert_eq!(
batch.summary.worst_exit_category,
ExitCategory::ValidationFailed
);
Ok(())
}
}