1#![deny(missing_docs)]
2pub(crate) mod pdfa;
47pub(crate) mod pdfua;
48pub(crate) mod pdfx;
49pub mod tagged;
50
51pub(crate) mod check;
52mod xmp;
53
54use pdf_syntax::Pdf;
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58pub enum PdfALevel {
59 A1a,
61 A1b,
63 A2a,
65 A2b,
67 A2u,
69 A3a,
71 A3b,
73 A3u,
75 A4,
77 A4f,
79 A4e,
81}
82
83impl PdfALevel {
84 pub fn part(self) -> u8 {
86 match self {
87 Self::A1a | Self::A1b => 1,
88 Self::A2a | Self::A2b | Self::A2u => 2,
89 Self::A3a | Self::A3b | Self::A3u => 3,
90 Self::A4 | Self::A4f | Self::A4e => 4,
91 }
92 }
93
94 pub fn conformance(self) -> &'static str {
96 match self {
97 Self::A1a | Self::A2a | Self::A3a => "A",
98 Self::A1b | Self::A2b | Self::A3b => "B",
99 Self::A2u | Self::A3u => "U",
100 Self::A4 => "",
101 Self::A4f => "F",
102 Self::A4e => "E",
103 }
104 }
105
106 pub fn requires_tagged(self) -> bool {
108 matches!(self, Self::A1a | Self::A2a | Self::A3a)
111 }
112
113 pub fn from_parts(part: u8, conformance: &str) -> Option<Self> {
115 match (part, conformance.to_ascii_uppercase().as_str()) {
116 (1, "A") => Some(Self::A1a),
117 (1, "B") | (1, _) => Some(Self::A1b),
118 (2, "A") => Some(Self::A2a),
119 (2, "U") => Some(Self::A2u),
120 (2, "B") | (2, _) => Some(Self::A2b),
121 (3, "A") => Some(Self::A3a),
122 (3, "U") => Some(Self::A3u),
123 (3, "B") | (3, _) => Some(Self::A3b),
124 (4, "F") => Some(Self::A4f),
125 (4, "E") => Some(Self::A4e),
126 (4, _) => Some(Self::A4),
127 _ => None,
128 }
129 }
130}
131
132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
134pub enum PdfXLevel {
135 X1a2003,
137 X32003,
139 X4,
141}
142
143impl PdfXLevel {
144 pub fn forbids_transparency(self) -> bool {
146 matches!(self, Self::X1a2003 | Self::X32003)
147 }
148
149 pub fn version_string(self) -> &'static str {
151 match self {
152 Self::X1a2003 => "PDF/X-1a:2003",
153 Self::X32003 => "PDF/X-3:2003",
154 Self::X4 => "PDF/X-4",
155 }
156 }
157
158 pub fn gts_version(self) -> &'static str {
160 match self {
161 Self::X1a2003 => "PDF/X-1a:2003",
162 Self::X32003 => "PDF/X-3:2003",
163 Self::X4 => "PDF/X-4",
164 }
165 }
166}
167
168#[derive(Debug, Clone)]
170pub struct ComplianceIssue {
171 pub rule: String,
173 pub severity: Severity,
175 pub message: String,
177 pub location: Option<String>,
179}
180
181#[derive(Debug, Clone, Copy, PartialEq, Eq)]
183pub enum Severity {
184 Error,
186 Warning,
188 Info,
190}
191
192#[derive(Debug, Clone, Default)]
194pub struct ComplianceReport {
195 pub issues: Vec<ComplianceIssue>,
197 pub pdfa_level: Option<PdfALevel>,
199 pub compliant: bool,
201}
202
203impl ComplianceReport {
204 pub fn is_compliant(&self) -> bool {
206 !self.issues.iter().any(|i| i.severity == Severity::Error)
207 }
208
209 pub fn error_count(&self) -> usize {
211 self.issues
212 .iter()
213 .filter(|i| i.severity == Severity::Error)
214 .count()
215 }
216
217 pub fn warning_count(&self) -> usize {
219 self.issues
220 .iter()
221 .filter(|i| i.severity == Severity::Warning)
222 .count()
223 }
224}
225
226#[must_use]
228pub fn validate_pdfa(pdf: &Pdf, level: PdfALevel) -> ComplianceReport {
229 pdfa::validate(pdf, level)
230}
231
232#[must_use]
234pub fn validate_pdfa_timed(pdf: &Pdf, level: PdfALevel) -> ComplianceReport {
235 pdfa::validate_timed(pdf, level)
236}
237
238#[must_use]
242pub fn validate_pdfa_with_progress(
243 pdf: &Pdf,
244 level: PdfALevel,
245 progress: &std::sync::Mutex<String>,
246) -> ComplianceReport {
247 pdfa::validate_with_progress(pdf, level, progress)
248}
249
250#[must_use]
256pub fn detect_pdfa_level(pdf: &Pdf) -> Option<PdfALevel> {
257 let xmp = check::get_xmp_metadata(pdf)?;
258 let (part, conformance) = check::parse_xmp_pdfa_lenient(&xmp)?;
259 PdfALevel::from_parts(part, &conformance)
260}
261
262#[must_use]
270pub fn preferred_pdfa_level(pdf: &Pdf) -> PdfALevel {
271 match detect_pdfa_level(pdf) {
272 Some(level) if level.part() >= 2 => level,
273 Some(_level)
274 if check::has_xref_streams(pdf)
275 || check::uses_transparency(pdf)
276 || check::uses_jpeg2000(pdf) =>
277 {
278 PdfALevel::A2b
279 }
280 Some(level) => level,
281 None => PdfALevel::A2b,
282 }
283}
284
285pub fn validate_pdfua(pdf: &Pdf) -> ComplianceReport {
287 pdfua::validate(pdf)
288}
289
290pub fn validate_pdfx(pdf: &Pdf, level: PdfXLevel) -> ComplianceReport {
292 pdfx::validate(pdf, level)
293}
294
295pub fn parse_structure_tree(pdf: &Pdf) -> Option<tagged::StructureTree> {
297 tagged::parse(pdf)
298}
299
300#[cfg(test)]
301mod tests {
302 use super::{detect_pdfa_level, preferred_pdfa_level, PdfALevel};
303 use pdf_syntax::Pdf;
304
305 fn build_pdf(objs: &[(u32, &[u8])]) -> Vec<u8> {
314 let max = objs.iter().map(|(n, _)| *n).max().unwrap_or(0);
315 let mut pdf: Vec<u8> = b"%PDF-1.4\n".to_vec();
316 let mut offsets = std::collections::HashMap::new();
317 for (n, body) in objs {
318 offsets.insert(*n, pdf.len());
319 pdf.extend_from_slice(format!("{n} 0 obj\n").as_bytes());
320 pdf.extend_from_slice(body);
321 pdf.extend_from_slice(b"\nendobj\n");
322 }
323 let xref_off = pdf.len();
324 pdf.extend_from_slice(format!("xref\n0 {}\n", max + 1).as_bytes());
325 pdf.extend_from_slice(b"0000000000 65535 f \n");
326 for n in 1..=max {
327 match offsets.get(&n) {
328 Some(off) => pdf.extend_from_slice(format!("{off:010} 00000 n \n").as_bytes()),
329 None => pdf.extend_from_slice(b"0000000000 65535 f \n"),
330 }
331 }
332 pdf.extend_from_slice(
333 format!(
334 "trailer\n<< /Size {} /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF",
335 max + 1
336 )
337 .as_bytes(),
338 );
339 pdf
340 }
341
342 fn pdfa1b_xmp() -> Vec<u8> {
343 br#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
344<x:xmpmeta xmlns:x="adobe:ns:meta/">
345 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
346 <rdf:Description rdf:about="" xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">
347 <pdfaid:part>1</pdfaid:part>
348 <pdfaid:conformance>B</pdfaid:conformance>
349 </rdf:Description>
350 </rdf:RDF>
351</x:xmpmeta>
352<?xpacket end="w"?>"#
353 .to_vec()
354 }
355
356 fn parse_pdf(bytes: Vec<u8>) -> Pdf {
357 Pdf::new(bytes).unwrap()
358 }
359
360 #[test]
365 fn preferred_level_defaults_to_a2b_without_xmp() {
366 let bytes = build_pdf(&[
368 (1, b"<< /Type /Catalog /Pages 2 0 R >>"),
369 (2, b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>"),
370 (3, b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 100 100] >>"),
371 ]);
372 let pdf = parse_pdf(bytes);
373 assert_eq!(detect_pdfa_level(&pdf), None);
374 assert_eq!(preferred_pdfa_level(&pdf), PdfALevel::A2b);
375 }
376
377 #[test]
378 fn preferred_level_keeps_declared_a1b_when_source_is_part1_compatible() {
379 let xmp_body = {
381 let xmp = pdfa1b_xmp();
382 let len = xmp.len();
383 let mut body = format!(
384 "<< /Type /Metadata /Subtype /XML /Length {len} >>\nstream\n"
385 )
386 .into_bytes();
387 body.extend_from_slice(&xmp);
388 body.extend_from_slice(b"\nendstream");
389 body
390 };
391 let bytes = build_pdf(&[
392 (1, b"<< /Type /Catalog /Pages 2 0 R /Metadata 4 0 R >>"),
393 (2, b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>"),
394 (3, b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 100 100] >>"),
395 (4, &xmp_body),
396 ]);
397 let pdf = parse_pdf(bytes);
398 assert_eq!(detect_pdfa_level(&pdf), Some(PdfALevel::A1b));
399 assert!(!crate::check::has_xref_streams(&pdf));
400 assert!(!crate::check::uses_transparency(&pdf));
401 assert!(!crate::check::uses_jpeg2000(&pdf));
402 assert_eq!(preferred_pdfa_level(&pdf), PdfALevel::A1b);
403 }
404
405 #[test]
406 fn preferred_level_promotes_declared_a1b_for_xref_streams() {
407 let xmp_body = {
411 let xmp = pdfa1b_xmp();
412 let len = xmp.len();
413 let mut body = format!(
414 "<< /Type /Metadata /Subtype /XML /Length {len} >>\nstream\n"
415 )
416 .into_bytes();
417 body.extend_from_slice(&xmp);
418 body.extend_from_slice(b"\nendstream");
419 body
420 };
421 let mut bytes = build_pdf(&[
423 (1, b"<< /Type /Catalog /Pages 2 0 R /Metadata 4 0 R >>"),
424 (2, b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>"),
425 (3, b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 100 100] >>"),
426 (4, &xmp_body),
427 ]);
428 bytes.extend_from_slice(b"\n%% /XRefStm 0\n");
430 let pdf = parse_pdf(bytes);
431 assert_eq!(detect_pdfa_level(&pdf), Some(PdfALevel::A1b));
432 assert!(crate::check::has_xref_streams(&pdf));
433 assert_eq!(preferred_pdfa_level(&pdf), PdfALevel::A2b);
434 }
435
436 #[test]
437 fn preferred_level_promotes_declared_a1b_for_transparency() {
438 let xmp_body = {
440 let xmp = pdfa1b_xmp();
441 let len = xmp.len();
442 let mut body = format!(
443 "<< /Type /Metadata /Subtype /XML /Length {len} >>\nstream\n"
444 )
445 .into_bytes();
446 body.extend_from_slice(&xmp);
447 body.extend_from_slice(b"\nendstream");
448 body
449 };
450 let bytes = build_pdf(&[
451 (1, b"<< /Type /Catalog /Pages 2 0 R /Metadata 5 0 R >>"),
452 (2, b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>"),
453 (
455 3,
456 b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 100 100] \
457 /Resources << /ExtGState << /GS1 4 0 R >> >> >>",
458 ),
459 (4, b"<< /Type /ExtGState /ca 0.5 >>"),
460 (5, &xmp_body),
461 ]);
462 let pdf = parse_pdf(bytes);
463 assert_eq!(detect_pdfa_level(&pdf), Some(PdfALevel::A1b));
464 assert!(crate::check::uses_transparency(&pdf));
465 assert_eq!(preferred_pdfa_level(&pdf), PdfALevel::A2b);
466 }
467
468 #[test]
469 fn preferred_level_promotes_declared_a1b_for_jpeg2000() {
470 let xmp_body = {
472 let xmp = pdfa1b_xmp();
473 let len = xmp.len();
474 let mut body = format!(
475 "<< /Type /Metadata /Subtype /XML /Length {len} >>\nstream\n"
476 )
477 .into_bytes();
478 body.extend_from_slice(&xmp);
479 body.extend_from_slice(b"\nendstream");
480 body
481 };
482 let img_body =
485 b"<< /Type /XObject /Subtype /Image /Width 1 /Height 1 \
486 /BitsPerComponent 8 /ColorSpace /DeviceGray /Filter /JPXDecode \
487 /Length 8 >>\nstream\n00000000\nendstream";
488 let bytes = build_pdf(&[
489 (1, b"<< /Type /Catalog /Pages 2 0 R /Metadata 5 0 R >>"),
490 (2, b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>"),
491 (3, b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 100 100] >>"),
492 (4, img_body),
493 (5, &xmp_body),
494 ]);
495 let pdf = parse_pdf(bytes);
496 assert_eq!(detect_pdfa_level(&pdf), Some(PdfALevel::A1b));
497 assert!(crate::check::uses_jpeg2000(&pdf));
498 assert_eq!(preferred_pdfa_level(&pdf), PdfALevel::A2b);
499 }
500}