use super::types::{ComplianceWarning, WarningCode};
use crate::document::PdfDocument;
use crate::error::Result;
use crate::object::Object;
use std::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PdfUaLevel {
Ua1,
Ua2,
}
impl fmt::Display for PdfUaLevel {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
PdfUaLevel::Ua1 => write!(f, "PDF/UA-1"),
PdfUaLevel::Ua2 => write!(f, "PDF/UA-2"),
}
}
}
impl PdfUaLevel {
pub fn xmp_part(&self) -> &'static str {
match self {
PdfUaLevel::Ua1 => "1",
PdfUaLevel::Ua2 => "2",
}
}
}
#[derive(Debug, Clone)]
pub struct UaValidationResult {
pub is_compliant: bool,
pub level: PdfUaLevel,
pub errors: Vec<UaComplianceError>,
pub warnings: Vec<ComplianceWarning>,
pub stats: UaValidationStats,
}
impl Default for UaValidationResult {
fn default() -> Self {
Self {
is_compliant: true,
level: PdfUaLevel::Ua1,
errors: Vec::new(),
warnings: Vec::new(),
stats: UaValidationStats::default(),
}
}
}
impl UaValidationResult {
pub fn new(level: PdfUaLevel) -> Self {
Self {
level,
..Default::default()
}
}
pub fn add_error(&mut self, error: UaComplianceError) {
self.errors.push(error);
self.is_compliant = false;
}
pub fn add_warning(&mut self, warning: ComplianceWarning) {
self.warnings.push(warning);
}
pub fn has_errors(&self) -> bool {
!self.errors.is_empty()
}
pub fn has_warnings(&self) -> bool {
!self.warnings.is_empty()
}
}
#[derive(Debug, Clone, Default)]
pub struct UaValidationStats {
pub structure_elements_checked: usize,
pub images_checked: usize,
pub images_with_alt: usize,
pub tables_checked: usize,
pub form_fields_checked: usize,
pub annotations_checked: usize,
pub pages_checked: usize,
}
#[derive(Debug, Clone)]
pub struct UaComplianceError {
pub code: UaErrorCode,
pub message: String,
pub location: Option<String>,
pub wcag_ref: Option<String>,
pub clause: Option<String>,
}
impl UaComplianceError {
pub fn new(code: UaErrorCode, message: impl Into<String>) -> Self {
Self {
code,
message: message.into(),
location: None,
wcag_ref: None,
clause: None,
}
}
pub fn with_location(mut self, location: impl Into<String>) -> Self {
self.location = Some(location.into());
self
}
pub fn with_wcag(mut self, wcag_ref: impl Into<String>) -> Self {
self.wcag_ref = Some(wcag_ref.into());
self
}
pub fn with_clause(mut self, clause: impl Into<String>) -> Self {
self.clause = Some(clause.into());
self
}
}
impl fmt::Display for UaComplianceError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "[{}] {}", self.code, self.message)?;
if let Some(ref loc) = self.location {
write!(f, " (at {})", loc)?;
}
if let Some(ref wcag) = self.wcag_ref {
write!(f, " [WCAG {}]", wcag)?;
}
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum UaErrorCode {
NotTaggedPdf,
MissingLanguage,
MissingTitle,
TitleNotDisplayed,
MissingPdfuaId,
InvalidPdfuaId,
ContentNotTagged,
InvalidStructureType,
MissingRoleMapping,
InvalidStructureNesting,
HeadingLevelSkipped,
FigureMissingAlt,
DecorativeNotArtifact,
FigureCaptionNotAssociated,
TableMissingHeaders,
TableHeaderNotTh,
TableDataNotTd,
TableHeadersNotAssociated,
TableScopeMissing,
ComplexTableNoIds,
FormFieldMissingName,
FormFieldMissingTooltip,
RequiredFieldNotIndicated,
FormNoSubmitButton,
LinkTextNotDescriptive,
LinkNoDestination,
ListItemsNotMarked,
NestedListInvalid,
AnnotationNotTagged,
AnnotationMissingContents,
WidgetMissingRole,
FontNotEmbedded,
MissingUnicodeMapping,
MissingActualText,
InsufficientContrast,
ColorOnlyInformation,
JavaScriptNoAlternative,
MultimediaNoCaptions,
ReadingOrderInvalid,
BookmarksMismatch,
}
impl fmt::Display for UaErrorCode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let code = match self {
UaErrorCode::NotTaggedPdf => "UA-DOC-001",
UaErrorCode::MissingLanguage => "UA-DOC-002",
UaErrorCode::MissingTitle => "UA-DOC-003",
UaErrorCode::TitleNotDisplayed => "UA-DOC-004",
UaErrorCode::MissingPdfuaId => "UA-DOC-005",
UaErrorCode::InvalidPdfuaId => "UA-DOC-006",
UaErrorCode::ContentNotTagged => "UA-STRUCT-001",
UaErrorCode::InvalidStructureType => "UA-STRUCT-002",
UaErrorCode::MissingRoleMapping => "UA-STRUCT-003",
UaErrorCode::InvalidStructureNesting => "UA-STRUCT-004",
UaErrorCode::HeadingLevelSkipped => "UA-STRUCT-005",
UaErrorCode::FigureMissingAlt => "UA-FIG-001",
UaErrorCode::DecorativeNotArtifact => "UA-FIG-002",
UaErrorCode::FigureCaptionNotAssociated => "UA-FIG-003",
UaErrorCode::TableMissingHeaders => "UA-TBL-001",
UaErrorCode::TableHeaderNotTh => "UA-TBL-002",
UaErrorCode::TableDataNotTd => "UA-TBL-003",
UaErrorCode::TableHeadersNotAssociated => "UA-TBL-004",
UaErrorCode::TableScopeMissing => "UA-TBL-005",
UaErrorCode::ComplexTableNoIds => "UA-TBL-006",
UaErrorCode::FormFieldMissingName => "UA-FORM-001",
UaErrorCode::FormFieldMissingTooltip => "UA-FORM-002",
UaErrorCode::RequiredFieldNotIndicated => "UA-FORM-003",
UaErrorCode::FormNoSubmitButton => "UA-FORM-004",
UaErrorCode::LinkTextNotDescriptive => "UA-LINK-001",
UaErrorCode::LinkNoDestination => "UA-LINK-002",
UaErrorCode::ListItemsNotMarked => "UA-LIST-001",
UaErrorCode::NestedListInvalid => "UA-LIST-002",
UaErrorCode::AnnotationNotTagged => "UA-ANNOT-001",
UaErrorCode::AnnotationMissingContents => "UA-ANNOT-002",
UaErrorCode::WidgetMissingRole => "UA-ANNOT-003",
UaErrorCode::FontNotEmbedded => "UA-TEXT-001",
UaErrorCode::MissingUnicodeMapping => "UA-TEXT-002",
UaErrorCode::MissingActualText => "UA-TEXT-003",
UaErrorCode::InsufficientContrast => "UA-COLOR-001",
UaErrorCode::ColorOnlyInformation => "UA-COLOR-002",
UaErrorCode::JavaScriptNoAlternative => "UA-OTHER-001",
UaErrorCode::MultimediaNoCaptions => "UA-OTHER-002",
UaErrorCode::ReadingOrderInvalid => "UA-OTHER-003",
UaErrorCode::BookmarksMismatch => "UA-OTHER-004",
};
write!(f, "{}", code)
}
}
pub struct PdfUaValidator {
check_heading_sequence: bool,
check_color_contrast: bool,
allowed_custom_types: Vec<String>,
}
impl Default for PdfUaValidator {
fn default() -> Self {
Self::new()
}
}
impl PdfUaValidator {
pub fn new() -> Self {
Self {
check_heading_sequence: true,
check_color_contrast: false, allowed_custom_types: Vec::new(),
}
}
pub fn check_heading_sequence(mut self, enabled: bool) -> Self {
self.check_heading_sequence = enabled;
self
}
pub fn check_color_contrast(mut self, enabled: bool) -> Self {
self.check_color_contrast = enabled;
self
}
pub fn allow_custom_types(mut self, types: Vec<String>) -> Self {
self.allowed_custom_types = types;
self
}
pub fn validate(
&self,
document: &mut PdfDocument,
level: PdfUaLevel,
) -> Result<UaValidationResult> {
let mut result = UaValidationResult::new(level);
self.validate_tagged_pdf(document, &mut result)?;
self.validate_language(document, &mut result)?;
self.validate_title(document, &mut result)?;
self.validate_structure_tree(document, &mut result)?;
self.validate_figures(document, &mut result)?;
self.validate_tables(document, &mut result)?;
self.validate_form_fields(document, &mut result)?;
self.validate_annotations(document, &mut result)?;
result.is_compliant = result.errors.is_empty();
Ok(result)
}
fn validate_tagged_pdf(
&self,
document: &mut PdfDocument,
result: &mut UaValidationResult,
) -> Result<()> {
let catalog = document.catalog()?;
let catalog_dict = match catalog {
Object::Dictionary(d) => d,
_ => {
result.add_error(
UaComplianceError::new(UaErrorCode::NotTaggedPdf, "Invalid document catalog")
.with_clause("7.1"),
);
return Ok(());
},
};
let is_marked = if let Some(mark_info) = catalog_dict.get("MarkInfo") {
let resolved_mark_info = document.resolve_references(mark_info, 1)?;
if let Object::Dictionary(mi) = resolved_mark_info {
matches!(mi.get("Marked"), Some(Object::Boolean(true)))
} else {
false
}
} else {
false
};
if !is_marked {
result.add_error(
UaComplianceError::new(
UaErrorCode::NotTaggedPdf,
"Document must be a Tagged PDF (MarkInfo/Marked = true)",
)
.with_clause("7.1")
.with_wcag("1.3.1"),
);
}
if !catalog_dict.contains_key("StructTreeRoot") {
result.add_error(
UaComplianceError::new(
UaErrorCode::NotTaggedPdf,
"Document must have a structure tree (StructTreeRoot)",
)
.with_clause("7.1")
.with_wcag("1.3.1"),
);
}
Ok(())
}
fn validate_language(
&self,
document: &mut PdfDocument,
result: &mut UaValidationResult,
) -> Result<()> {
let catalog = document.catalog()?;
let catalog_dict = match catalog {
Object::Dictionary(d) => d,
_ => return Ok(()),
};
if !catalog_dict.contains_key("Lang") {
result.add_error(
UaComplianceError::new(
UaErrorCode::MissingLanguage,
"Document must specify a primary language (/Lang in catalog)",
)
.with_clause("7.2")
.with_wcag("3.1.1"),
);
} else {
if let Some(Object::String(lang)) = catalog_dict.get("Lang") {
let lang_str = String::from_utf8_lossy(lang);
if lang_str.is_empty() || !is_valid_language_tag(&lang_str) {
result.add_warning(ComplianceWarning::new(
WarningCode::MissingRecommendedMetadata,
format!("Language tag '{}' may not be a valid BCP 47 tag", lang_str),
));
}
}
}
Ok(())
}
fn validate_title(
&self,
document: &mut PdfDocument,
result: &mut UaValidationResult,
) -> Result<()> {
let catalog = document.catalog()?;
let catalog_dict = match catalog {
Object::Dictionary(d) => d,
_ => return Ok(()),
};
let display_title = if let Some(vp) = catalog_dict.get("ViewerPreferences") {
let resolved_vp = document.resolve_references(vp, 1)?;
if let Object::Dictionary(vp_dict) = resolved_vp {
matches!(vp_dict.get("DisplayDocTitle"), Some(Object::Boolean(true)))
} else {
false
}
} else {
false
};
if !display_title {
result.add_error(
UaComplianceError::new(
UaErrorCode::TitleNotDisplayed,
"ViewerPreferences/DisplayDocTitle must be true",
)
.with_clause("7.1")
.with_wcag("2.4.2"),
);
}
let trailer = document.trailer().clone();
let has_title = if let Object::Dictionary(trailer_dict) = trailer {
if let Some(info_ref) = trailer_dict.get("Info") {
let info_obj = document.resolve_references(info_ref, 1)?;
if let Object::Dictionary(info_dict) = info_obj {
if let Some(Object::String(title)) = info_dict.get("Title") {
!title.is_empty()
} else {
false
}
} else {
false
}
} else {
false
}
} else {
false
};
if !has_title {
result.add_error(
UaComplianceError::new(
UaErrorCode::MissingTitle,
"Document must have a title in document info or XMP metadata",
)
.with_clause("7.1")
.with_wcag("2.4.2"),
);
}
Ok(())
}
fn validate_structure_tree(
&self,
document: &mut PdfDocument,
result: &mut UaValidationResult,
) -> Result<()> {
let catalog = document.catalog()?;
let catalog_dict = match catalog {
Object::Dictionary(d) => d,
_ => return Ok(()),
};
let struct_tree_root = match catalog_dict.get("StructTreeRoot") {
Some(obj) => document.resolve_references(obj, 1)?,
None => return Ok(()), };
if let Object::Dictionary(str_dict) = struct_tree_root {
let has_role_map = str_dict.contains_key("RoleMap");
if !str_dict.contains_key("K") {
result.add_warning(ComplianceWarning::new(
WarningCode::PartialCheck,
"Structure tree root has no children",
));
}
if self.check_heading_sequence {
result.add_warning(ComplianceWarning::new(
WarningCode::PartialCheck,
"Heading sequence validation requires full structure tree traversal",
));
}
result.stats.structure_elements_checked += 1;
let _ = has_role_map;
}
Ok(())
}
fn validate_figures(
&self,
document: &mut PdfDocument,
result: &mut UaValidationResult,
) -> Result<()> {
result.add_warning(ComplianceWarning::new(
WarningCode::PartialCheck,
"Figure alt text validation requires structure tree traversal",
));
let _ = document;
Ok(())
}
fn validate_tables(
&self,
document: &mut PdfDocument,
result: &mut UaValidationResult,
) -> Result<()> {
result.add_warning(ComplianceWarning::new(
WarningCode::PartialCheck,
"Table header validation requires structure tree traversal",
));
let _ = document;
Ok(())
}
fn validate_form_fields(
&self,
document: &mut PdfDocument,
result: &mut UaValidationResult,
) -> Result<()> {
let catalog = document.catalog()?;
let catalog_dict = match catalog {
Object::Dictionary(d) => d,
_ => return Ok(()),
};
let acro_form = match catalog_dict.get("AcroForm") {
Some(obj) => document.resolve_references(obj, 1)?,
None => return Ok(()), };
if let Object::Dictionary(form_dict) = acro_form {
if let Some(fields) = form_dict.get("Fields") {
let resolved_fields = document.resolve_references(fields, 1)?;
if let Object::Array(fields_arr) = resolved_fields {
for field in &fields_arr {
let resolved_field = document.resolve_references(field, 1)?;
if let Object::Dictionary(field_dict) = resolved_field {
if !field_dict.contains_key("TU") && !field_dict.contains_key("T") {
result.add_warning(ComplianceWarning::new(
WarningCode::MissingRecommendedMetadata,
"Form field missing TU (tooltip) or T (name)",
));
}
result.stats.form_fields_checked += 1;
}
}
}
}
}
Ok(())
}
fn validate_annotations(
&self,
document: &mut PdfDocument,
result: &mut UaValidationResult,
) -> Result<()> {
result.add_warning(ComplianceWarning::new(
WarningCode::PartialCheck,
"Full annotation validation requires page-level access",
));
let _ = document;
Ok(())
}
}
fn is_valid_language_tag(tag: &str) -> bool {
let parts: Vec<&str> = tag.split('-').collect();
if parts.is_empty() {
return false;
}
let primary = parts[0];
if primary.len() < 2 || primary.len() > 3 || !primary.chars().all(|c| c.is_ascii_alphabetic()) {
return false;
}
true
}
pub fn validate_pdf_ua(
document: &mut PdfDocument,
level: PdfUaLevel,
) -> Result<UaValidationResult> {
let validator = PdfUaValidator::new();
validator.validate(document, level)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pdf_ua_level_display() {
assert_eq!(format!("{}", PdfUaLevel::Ua1), "PDF/UA-1");
assert_eq!(format!("{}", PdfUaLevel::Ua2), "PDF/UA-2");
}
#[test]
fn test_pdf_ua_level_xmp() {
assert_eq!(PdfUaLevel::Ua1.xmp_part(), "1");
assert_eq!(PdfUaLevel::Ua2.xmp_part(), "2");
}
#[test]
fn test_validation_result() {
let mut result = UaValidationResult::new(PdfUaLevel::Ua1);
assert!(result.is_compliant);
assert!(!result.has_errors());
result.add_error(UaComplianceError::new(UaErrorCode::NotTaggedPdf, "Not tagged"));
assert!(result.has_errors());
assert!(!result.is_compliant);
}
#[test]
fn test_compliance_error_display() {
let error = UaComplianceError::new(UaErrorCode::FigureMissingAlt, "Image without alt text")
.with_location("Page 1")
.with_wcag("1.1.1");
let display = format!("{}", error);
assert!(display.contains("[UA-FIG-001]"));
assert!(display.contains("Page 1"));
assert!(display.contains("WCAG 1.1.1"));
}
#[test]
fn test_error_code_display() {
assert_eq!(format!("{}", UaErrorCode::NotTaggedPdf), "UA-DOC-001");
assert_eq!(format!("{}", UaErrorCode::FigureMissingAlt), "UA-FIG-001");
assert_eq!(format!("{}", UaErrorCode::TableMissingHeaders), "UA-TBL-001");
}
#[test]
fn test_language_tag_validation() {
assert!(is_valid_language_tag("en"));
assert!(is_valid_language_tag("en-US"));
assert!(is_valid_language_tag("zh-Hans"));
assert!(is_valid_language_tag("de-AT"));
assert!(!is_valid_language_tag(""));
assert!(!is_valid_language_tag("e")); assert!(!is_valid_language_tag("english")); }
#[test]
fn test_validator_builder() {
let validator = PdfUaValidator::new()
.check_heading_sequence(false)
.check_color_contrast(true)
.allow_custom_types(vec!["MyHeading".to_string()]);
assert!(!validator.check_heading_sequence);
assert!(validator.check_color_contrast);
assert!(validator
.allowed_custom_types
.contains(&"MyHeading".to_string()));
}
}