#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum PdfCompliance {
#[default]
Standard,
PdfA1b,
PdfUA1,
PdfA1bUA1,
}
impl PdfCompliance {
pub fn requires_pdfa(&self) -> bool {
matches!(self, PdfCompliance::PdfA1b | PdfCompliance::PdfA1bUA1)
}
pub fn requires_pdfua(&self) -> bool {
matches!(self, PdfCompliance::PdfUA1 | PdfCompliance::PdfA1bUA1)
}
}
pub const SRGB_ICC_PROFILE: &[u8] = &[
0x00, 0x00, 0x01, 0xDC, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x6D, 0x6E, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5A, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x63, 0x73, 0x70, 0x4D, 0x53, 0x46, 0x54, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x43, 0x20, 0x73, 0x52, 0x47, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0xF6, 0xD6, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xD3, 0x2D, 0x48, 0x50, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x09, 0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0xF0, 0x00, 0x00, 0x00, 0x64,
0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x54, 0x00, 0x00, 0x00, 0x2A,
0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x01, 0x7E, 0x00, 0x00, 0x00, 0x14,
0x72, 0x58, 0x59, 0x5A, 0x00, 0x00, 0x01, 0x92, 0x00, 0x00, 0x00, 0x14,
0x67, 0x58, 0x59, 0x5A, 0x00, 0x00, 0x01, 0xA6, 0x00, 0x00, 0x00, 0x14,
0x62, 0x58, 0x59, 0x5A, 0x00, 0x00, 0x01, 0xBA, 0x00, 0x00, 0x00, 0x14,
0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0xCE, 0x00, 0x00, 0x00, 0x0E,
0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0xCE, 0x00, 0x00, 0x00, 0x0E,
0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0xCE, 0x00, 0x00, 0x00, 0x0E,
0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x73, 0x52, 0x47, 0x42, 0x20, 0x49, 0x45, 0x43, 0x36, 0x31, 0x39, 0x36, 0x36, 0x2D, 0x32, 0x2D, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00,
0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x43, 0x6F, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68, 0x74, 0x20, 0x49, 0x45, 0x43, 0x20, 0x68, 0x74, 0x74, 0x70, 0x3A, 0x2F, 0x2F, 0x77, 0x77, 0x77, 0x2E, 0x69, 0x65, 0x63, 0x2E, 0x63, 0x68, 0x00, 0x00, 0x00, 0x58, 0x59, 0x5A, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF6, 0xD6, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xD3, 0x2D, 0x58, 0x59, 0x5A, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6E, 0xA2, 0x00, 0x00, 0x38, 0xF2, 0x00, 0x00, 0x03, 0x90, 0x58, 0x59, 0x5A, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x99, 0x00, 0x00, 0xB7, 0x85, 0x00, 0x00, 0x18, 0xDA, 0x58, 0x59, 0x5A, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x0E, 0x00, 0x00, 0x0B, 0xA3, 0x00, 0x00, 0xB6, 0xCF, 0x63, 0x75, 0x72, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02,
0x33, ];
pub fn generate_xmp_metadata(
title: Option<&str>,
creator_tool: &str,
compliance: PdfCompliance,
) -> String {
let title_str = title.unwrap_or("Untitled");
let pdfa_part = if compliance.requires_pdfa() {
r#" <rdf:Description rdf:about=""
xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">
<pdfaid:part>1</pdfaid:part>
<pdfaid:conformance>B</pdfaid:conformance>
</rdf:Description>
"#
} else {
""
};
let pdfua_part = if compliance.requires_pdfua() {
r#" <rdf:Description rdf:about=""
xmlns:pdfuaid="http://www.aiim.org/pdfua/ns/id/">
<pdfuaid:part>1</pdfuaid:part>
</rdf:Description>
"#
} else {
""
};
format!(
"<?xpacket begin=\"\u{FEFF}\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n\
<x:xmpmeta xmlns:x=\"adobe:ns:meta/\">\n \
<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n \
<rdf:Description rdf:about=\"\"\n \
xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n \
xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\">\n \
<dc:title>\n <rdf:Alt>\n \
<rdf:li xml:lang=\"x-default\">{title}</rdf:li>\n \
</rdf:Alt>\n </dc:title>\n \
<dc:format>application/pdf</dc:format>\n \
<xmp:CreatorTool>{tool}</xmp:CreatorTool>\n \
</rdf:Description>\n\
{pdfa}{pdfua}\
</rdf:RDF>\n\
</x:xmpmeta>\n\
<?xpacket end=\"w\"?>",
title = title_str,
tool = creator_tool,
pdfa = pdfa_part,
pdfua = pdfua_part,
)
}
#[derive(Debug, Default, Clone)]
pub struct DcFields {
pub title: Option<String>,
pub creator: Option<String>,
pub description: Option<String>,
pub date: Option<String>,
pub rights: Option<String>,
pub language: Option<String>,
}
pub fn extract_dc_fields(xmp: &str) -> DcFields {
DcFields {
title: extract_dc_value(xmp, "title"),
creator: extract_dc_value(xmp, "creator"),
description: extract_dc_value(xmp, "description"),
date: extract_dc_value(xmp, "date"),
rights: extract_dc_value(xmp, "rights"),
language: extract_dc_value(xmp, "language"),
}
}
fn extract_dc_value(xmp: &str, tag: &str) -> Option<String> {
let open_tag = format!("<dc:{}>", tag);
let close_tag = format!("</dc:{}>", tag);
let start_pos = xmp.find(&open_tag)?;
let after_open = start_pos + open_tag.len();
let end_pos = xmp[after_open..].find(&close_tag)?;
let inner = &xmp[after_open..after_open + end_pos];
if let Some(li_start) = inner.find("<rdf:li") {
let after_li_open = li_start + 7; let tag_close = inner[after_li_open..].find('>')?;
let val_start = after_li_open + tag_close + 1;
let val_end = inner[val_start..].find("</rdf:li")?;
let value = inner[val_start..val_start + val_end].trim().to_string();
if !value.is_empty() {
return Some(value);
}
}
let value = inner.trim().to_string();
if value.is_empty() {
None
} else {
Some(value)
}
}
pub fn reconcile_xmp(source: &str, compliance: PdfCompliance) -> String {
let with_wrappers = if source.contains("<?xpacket") {
source.to_string()
} else {
format!(
"<?xpacket begin=\"\u{FEFF}\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n{}\n<?xpacket end=\"w\"?>",
source
)
};
let pdfa_block = r#" <rdf:Description rdf:about=""
xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">
<pdfaid:part>1</pdfaid:part>
<pdfaid:conformance>B</pdfaid:conformance>
</rdf:Description>"#;
let pdfua_block = r#" <rdf:Description rdf:about=""
xmlns:pdfuaid="http://www.aiim.org/pdfua/ns/id/">
<pdfuaid:part>1</pdfuaid:part>
</rdf:Description>"#;
let needs_pdfa = compliance.requires_pdfa() && !with_wrappers.contains("pdfaid:part");
let needs_pdfua = compliance.requires_pdfua() && !with_wrappers.contains("pdfuaid:part");
if !needs_pdfa && !needs_pdfua {
return with_wrappers;
}
let injection_point = "</rdf:RDF>";
if let Some(rdf_close_pos) = with_wrappers.find(injection_point) {
let mut injected = String::with_capacity(with_wrappers.len() + 256);
injected.push_str(&with_wrappers[..rdf_close_pos]);
if needs_pdfa {
injected.push('\n');
injected.push_str(pdfa_block);
injected.push('\n');
}
if needs_pdfua {
injected.push('\n');
injected.push_str(pdfua_block);
injected.push('\n');
}
injected.push_str(&with_wrappers[rdf_close_pos..]);
injected
} else {
with_wrappers
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compliance_default() {
let c = PdfCompliance::default();
assert_eq!(c, PdfCompliance::Standard);
}
#[test]
fn test_compliance_pdfa_flags() {
assert!(PdfCompliance::PdfA1b.requires_pdfa());
assert!(!PdfCompliance::PdfA1b.requires_pdfua());
assert!(PdfCompliance::PdfA1bUA1.requires_pdfa());
assert!(PdfCompliance::PdfA1bUA1.requires_pdfua());
assert!(!PdfCompliance::Standard.requires_pdfa());
}
#[test]
fn test_xmp_metadata_pdfa() {
let xmp = generate_xmp_metadata(Some("Test Doc"), "fop-rs", PdfCompliance::PdfA1b);
assert!(xmp.contains("pdfaid:part"));
assert!(xmp.contains("<pdfaid:conformance>B</pdfaid:conformance>"));
assert!(!xmp.contains("pdfuaid"));
}
#[test]
fn test_xmp_metadata_pdfua() {
let xmp = generate_xmp_metadata(None, "fop-rs", PdfCompliance::PdfUA1);
assert!(!xmp.contains("pdfaid:part"));
assert!(xmp.contains("pdfuaid:part"));
}
#[test]
fn test_xmp_metadata_combined() {
let xmp = generate_xmp_metadata(Some("Test"), "fop-rs", PdfCompliance::PdfA1bUA1);
assert!(xmp.contains("pdfaid:part"));
assert!(xmp.contains("pdfuaid:part"));
}
#[test]
fn test_srgb_icc_profile_size() {
assert!(
SRGB_ICC_PROFILE.len() >= 128,
"ICC profile must be at least 128 bytes (header only)"
);
let declared = u32::from_be_bytes([
SRGB_ICC_PROFILE[0],
SRGB_ICC_PROFILE[1],
SRGB_ICC_PROFILE[2],
SRGB_ICC_PROFILE[3],
]) as usize;
assert_eq!(
declared,
SRGB_ICC_PROFILE.len(),
"ICC header declares {declared} bytes but array has {} bytes",
SRGB_ICC_PROFILE.len()
);
}
}
#[cfg(test)]
mod tests_extended {
use super::*;
#[test]
fn test_compliance_standard_requires_nothing() {
let c = PdfCompliance::Standard;
assert!(!c.requires_pdfa());
assert!(!c.requires_pdfua());
}
#[test]
fn test_compliance_pdfua_only() {
let c = PdfCompliance::PdfUA1;
assert!(!c.requires_pdfa());
assert!(c.requires_pdfua());
}
#[test]
fn test_compliance_pdfa_variant_name() {
assert_ne!(PdfCompliance::Standard, PdfCompliance::PdfA1b);
assert_ne!(PdfCompliance::PdfA1b, PdfCompliance::PdfUA1);
assert_ne!(PdfCompliance::PdfUA1, PdfCompliance::PdfA1bUA1);
}
#[test]
fn test_xmp_standard_contains_no_compliance_ids() {
let xmp = generate_xmp_metadata(Some("Doc"), "fop-rs", PdfCompliance::Standard);
assert!(!xmp.contains("pdfaid"));
assert!(!xmp.contains("pdfuaid"));
}
#[test]
fn test_xmp_metadata_contains_title() {
let xmp = generate_xmp_metadata(Some("My Title"), "fop-rs", PdfCompliance::Standard);
assert!(xmp.contains("My Title"));
}
#[test]
fn test_xmp_metadata_contains_creator_tool() {
let xmp = generate_xmp_metadata(None, "fop-render v1.0", PdfCompliance::Standard);
assert!(xmp.contains("fop-render v1.0"));
}
#[test]
fn test_xmp_metadata_no_title_uses_untitled() {
let xmp = generate_xmp_metadata(None, "fop", PdfCompliance::Standard);
assert!(xmp.contains("Untitled"));
}
#[test]
fn test_xmp_metadata_starts_with_xpacket() {
let xmp = generate_xmp_metadata(None, "fop", PdfCompliance::Standard);
assert!(xmp.starts_with("<?xpacket"));
}
#[test]
fn test_xmp_metadata_ends_with_xpacket() {
let xmp = generate_xmp_metadata(None, "fop", PdfCompliance::Standard);
assert!(xmp.ends_with("?>"));
}
#[test]
fn test_srgb_icc_profile_starts_with_signature() {
assert_eq!(
&SRGB_ICC_PROFILE[12..16],
&[0x6D, 0x6E, 0x74, 0x72],
"ICC profile class should be 'mntr'"
);
}
#[test]
fn test_srgb_icc_profile_colour_space_rgb() {
assert_eq!(
&SRGB_ICC_PROFILE[16..20],
&[0x52, 0x47, 0x42, 0x20],
"ICC colour space should be 'RGB '"
);
}
#[test]
fn test_srgb_icc_profile_pcs_xyz() {
assert_eq!(
&SRGB_ICC_PROFILE[20..24],
&[0x58, 0x59, 0x5A, 0x20],
"PCS should be 'XYZ '"
);
}
#[test]
fn test_compliance_copy_clone() {
let c = PdfCompliance::PdfA1b;
let c2 = c;
assert_eq!(c, c2);
}
#[test]
fn test_reconcile_xmp_adds_xpacket_wrappers() {
let source = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF></x:xmpmeta>"#;
let result = reconcile_xmp(source, PdfCompliance::Standard);
assert!(
result.starts_with("<?xpacket"),
"Should start with <?xpacket"
);
assert!(result.ends_with("?>"), "Should end with ?>");
}
#[test]
fn test_reconcile_xmp_keeps_existing_wrappers() {
let source = "<?xpacket begin=\"\u{FEFF}\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"><rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"></rdf:RDF></x:xmpmeta>\n<?xpacket end=\"w\"?>";
let result = reconcile_xmp(source, PdfCompliance::Standard);
let count = result.matches("<?xpacket").count();
assert_eq!(count, 2, "Should have exactly two xpacket PIs");
}
#[test]
fn test_reconcile_xmp_splices_pdfa() {
let source = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF></x:xmpmeta>"#;
let result = reconcile_xmp(source, PdfCompliance::PdfA1b);
assert!(result.contains("pdfaid:part"), "Should contain pdfaid:part");
assert!(
result.contains("pdfaid:conformance"),
"Should contain pdfaid:conformance"
);
}
#[test]
fn test_reconcile_xmp_splices_pdfua() {
let source = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF></x:xmpmeta>"#;
let result = reconcile_xmp(source, PdfCompliance::PdfUA1);
assert!(
result.contains("pdfuaid:part"),
"Should contain pdfuaid:part"
);
assert!(
!result.contains("pdfaid:part"),
"Should NOT contain pdfaid:part"
);
}
#[test]
fn test_reconcile_xmp_splices_both_for_combined() {
let source = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF></x:xmpmeta>"#;
let result = reconcile_xmp(source, PdfCompliance::PdfA1bUA1);
assert!(result.contains("pdfaid:part"), "Should contain pdfaid:part");
assert!(
result.contains("pdfuaid:part"),
"Should contain pdfuaid:part"
);
}
#[test]
fn test_reconcile_xmp_does_not_duplicate_existing_pdfa() {
let source = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about="" xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">
<pdfaid:part>1</pdfaid:part>
<pdfaid:conformance>B</pdfaid:conformance>
</rdf:Description>
</rdf:RDF></x:xmpmeta>"#;
let result = reconcile_xmp(source, PdfCompliance::PdfA1b);
let count = result.matches("<pdfaid:part>").count();
assert_eq!(
count, 1,
"<pdfaid:part> open-tag should appear exactly once"
);
}
#[test]
fn test_extract_dc_fields_title_from_alt() {
let xmp = r#"<?xpacket begin="" id="W"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title><rdf:Alt><rdf:li xml:lang="x-default">My Test Document</rdf:li></rdf:Alt></dc:title>
</rdf:Description>
</rdf:RDF></x:xmpmeta>
<?xpacket end="w"?>"#;
let fields = extract_dc_fields(xmp);
assert_eq!(fields.title.as_deref(), Some("My Test Document"));
}
#[test]
fn test_extract_dc_fields_title_simple() {
let xmp = r#"<dc:title>Simple Title</dc:title>"#;
let fields = extract_dc_fields(xmp);
assert_eq!(fields.title.as_deref(), Some("Simple Title"));
}
#[test]
fn test_extract_dc_fields_creator() {
let xmp = r#"<dc:creator>Jane Doe</dc:creator>"#;
let fields = extract_dc_fields(xmp);
assert_eq!(fields.creator.as_deref(), Some("Jane Doe"));
}
#[test]
fn test_extract_dc_fields_description() {
let xmp = r#"<dc:description>A document about things.</dc:description>"#;
let fields = extract_dc_fields(xmp);
assert_eq!(
fields.description.as_deref(),
Some("A document about things.")
);
}
#[test]
fn test_extract_dc_fields_absent_returns_none() {
let xmp = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"></x:xmpmeta>"#;
let fields = extract_dc_fields(xmp);
assert!(fields.title.is_none());
assert!(fields.creator.is_none());
assert!(fields.description.is_none());
assert!(fields.date.is_none());
assert!(fields.rights.is_none());
assert!(fields.language.is_none());
}
#[test]
fn test_extract_dc_date_from_xmp() {
let xmp = r#"<dc:date>2026-05-15</dc:date>"#;
let dc = extract_dc_fields(xmp);
assert_eq!(dc.date.as_deref(), Some("2026-05-15"));
}
#[test]
fn test_extract_dc_rights_from_xmp() {
let xmp = r#"<dc:rights>CC-BY 4.0</dc:rights>"#;
let dc = extract_dc_fields(xmp);
assert_eq!(dc.rights.as_deref(), Some("CC-BY 4.0"));
}
#[test]
fn test_extract_dc_language_from_xmp() {
let xmp = r#"<dc:language>en</dc:language>"#;
let dc = extract_dc_fields(xmp);
assert_eq!(dc.language.as_deref(), Some("en"));
}
}