1use crate::ast::{NodeType, PdfDocument};
2use crate::types::{PdfDictionary, PdfValue};
3use crate::validation::{ValidationIssue, ValidationReport, ValidationSeverity};
4
5pub struct PdfA1bValidator {
7 strict_mode: bool,
8}
9
10impl PdfA1bValidator {
11 pub fn new() -> Self {
12 Self { strict_mode: true }
13 }
14
15 pub fn with_strict_mode(mut self, strict: bool) -> Self {
16 self.strict_mode = strict;
17 self
18 }
19
20 pub fn validate(&self, document: &PdfDocument) -> ValidationReport {
21 let mut report = ValidationReport::new("PDF/A-1b".to_string(), "1.0".to_string());
22
23 self.validate_version(&mut report, document);
24 report.statistics.total_checks += 1;
25
26 self.validate_color_spaces(&mut report, document);
27 report.statistics.total_checks += 1;
28
29 self.validate_fonts(&mut report, document);
30 report.statistics.total_checks += 1;
31
32 self.validate_images(&mut report, document);
33 report.statistics.total_checks += 1;
34
35 self.validate_multimedia_content(&mut report, document);
36 report.statistics.total_checks += 1;
37
38 self.validate_javascript(&mut report, document);
39 report.statistics.total_checks += 1;
40
41 self.validate_annotations(&mut report, document);
42 report.statistics.total_checks += 1;
43
44 self.validate_forms(&mut report, document);
45 report.statistics.total_checks += 1;
46
47 self.validate_encryption(&mut report, document);
48 report.statistics.total_checks += 1;
49
50 self.validate_metadata(&mut report, document);
51 report.statistics.total_checks += 1;
52
53 self.validate_transparency(&mut report, document);
54 report.statistics.total_checks += 1;
55
56 self.validate_file_specification(&mut report, document);
57 report.statistics.total_checks += 1;
58
59 self.validate_cross_reference(&mut report, document);
60 report.statistics.total_checks += 1;
61
62 report.statistics.passed_checks = report
64 .statistics
65 .total_checks
66 .saturating_sub(report.statistics.failed_checks);
67
68 report
69 }
70
71 fn validate_version(&self, report: &mut ValidationReport, document: &PdfDocument) {
72 if document.version.major != 1 || document.version.minor > 4 {
73 report.add_issue(ValidationIssue {
74 severity: ValidationSeverity::Error,
75 code: "PDF_A_VERSION".to_string(),
76 message: "PDF/A-1 must be based on PDF version 1.4 or earlier".to_string(),
77 node_id: None,
78 location: Some("Document version".to_string()),
79 suggestion: Some(format!(
80 "Found version {}.{}",
81 document.version.major, document.version.minor
82 )),
83 });
84 }
85 }
86
87 fn validate_color_spaces(&self, report: &mut ValidationReport, document: &PdfDocument) {
88 let mut has_device_colors = false;
89 let mut missing_output_intent = true;
90
91 if let Some(catalog_dict) = document.get_catalog() {
92 if catalog_dict.contains_key("OutputIntents") {
93 missing_output_intent = false;
94 }
95 }
96
97 for node in document.ast.get_all_nodes() {
98 match &node.node_type {
99 NodeType::Image => {
100 if let Some(dict) = node.as_dict() {
101 if let Some(colorspace_value) = dict.get("ColorSpace") {
102 if let Some(colorspace_name) = colorspace_value.as_name() {
103 match colorspace_name.without_slash() {
104 "DeviceRGB" | "DeviceGray" | "DeviceCMYK" => {
105 has_device_colors = true;
106 }
107 _ => {}
108 }
109 }
110 }
111 }
112 }
113 NodeType::Page => {
114 if let Some(dict) = node.as_dict() {
115 self.check_resources_for_device_colors(dict, &mut has_device_colors);
116 }
117 }
118 _ => {}
119 }
120 }
121
122 if has_device_colors && missing_output_intent {
123 report.add_issue(ValidationIssue {
124 severity: ValidationSeverity::Error,
125 code: "PDF_A_COLOR_SPACE".to_string(),
126 message: "Device color spaces require OutputIntent specification".to_string(),
127 node_id: None,
128 location: Some("Color management".to_string()),
129 suggestion: Some(
130 "Found device color spaces but no OutputIntents in catalog".to_string(),
131 ),
132 });
133 }
134
135 if missing_output_intent && self.strict_mode {
136 report.add_issue(ValidationIssue {
137 severity: ValidationSeverity::Warning,
138 code: "PDF_A_OUTPUT_INTENT".to_string(),
139 message: "PDF/A-1b should include OutputIntents for color management".to_string(),
140 node_id: None,
141 location: Some("Color management".to_string()),
142 suggestion: None,
143 });
144 }
145 }
146
147 fn check_resources_for_device_colors(
148 &self,
149 page_dict: &PdfDictionary,
150 has_device_colors: &mut bool,
151 ) {
152 if let Some(resources_value) = page_dict.get("Resources") {
153 if let Some(resources_dict) = resources_value.as_dict() {
154 if let Some(colorspaces_value) = resources_dict.get("ColorSpace") {
155 if let Some(colorspaces_dict) = colorspaces_value.as_dict() {
156 for (_name, colorspace_value) in colorspaces_dict.iter() {
157 if let Some(colorspace_name) = colorspace_value.as_name() {
158 match colorspace_name.without_slash() {
159 "DeviceRGB" | "DeviceGray" | "DeviceCMYK" => {
160 *has_device_colors = true;
161 }
162 _ => {}
163 }
164 }
165 }
166 }
167 }
168 }
169 }
170 }
171
172 fn validate_fonts(&self, report: &mut ValidationReport, document: &PdfDocument) {
173 let mut unembedded_fonts = Vec::new();
174 let mut invalid_encodings = Vec::new();
175
176 for node in document.ast.get_all_nodes() {
177 if matches!(
178 node.node_type,
179 NodeType::Font
180 | NodeType::Type1Font
181 | NodeType::TrueTypeFont
182 | NodeType::Type3Font
183 | NodeType::CIDFont
184 ) {
185 if let Some(font_dict) = node.as_dict() {
186 let font_name = font_dict
187 .get("BaseFont")
188 .and_then(|v| v.as_name())
189 .map(|n| n.without_slash())
190 .unwrap_or("Unknown");
191
192 let is_embedded = self.is_font_embedded(font_dict);
194 if !is_embedded {
195 unembedded_fonts.push(font_name.to_string());
196
197 report.add_issue(ValidationIssue {
198 severity: ValidationSeverity::Error,
199 code: "PDF_A_FONT_EMBEDDING".to_string(),
200 message: "All fonts must be embedded in PDF/A-1b".to_string(),
201 node_id: Some(node.id),
202 location: Some("Font embedding".to_string()),
203 suggestion: Some(format!("Font '{}' is not embedded", font_name)),
204 });
205 }
206
207 if let Some(subtype) = font_dict.get("Subtype").and_then(|v| v.as_name()) {
208 if subtype.without_slash() != "Type3" {
209 self.validate_font_encoding(
210 font_dict,
211 font_name,
212 &mut invalid_encodings,
213 );
214 }
215 }
216 }
217 }
218 }
219
220 for encoding_issue in invalid_encodings {
221 report.add_issue(ValidationIssue {
222 severity: ValidationSeverity::Error,
223 code: "PDF_A_FONT_ENCODING".to_string(),
224 message: "Font encoding must be specified or use standard encoding".to_string(),
225 node_id: None,
226 location: Some("Font encoding".to_string()),
227 suggestion: Some(encoding_issue),
228 });
229 }
230 }
231
232 fn is_font_embedded(&self, font_dict: &PdfDictionary) -> bool {
233 font_dict.contains_key("FontFile") ||
234 font_dict.contains_key("FontFile2") ||
235 font_dict.contains_key("FontFile3") ||
236 font_dict.get("DescendantFonts")
238 .and_then(|v| v.as_array())
239 .map(|arr| !arr.is_empty())
241 .unwrap_or(false)
242 }
243
244 fn is_standard_font(&self, font_name: &str) -> bool {
245 matches!(
246 font_name,
247 "Times-Roman"
248 | "Times-Bold"
249 | "Times-Italic"
250 | "Times-BoldItalic"
251 | "Helvetica"
252 | "Helvetica-Bold"
253 | "Helvetica-Oblique"
254 | "Helvetica-BoldOblique"
255 | "Courier"
256 | "Courier-Bold"
257 | "Courier-Oblique"
258 | "Courier-BoldOblique"
259 | "Symbol"
260 | "ZapfDingbats"
261 )
262 }
263
264 fn validate_font_encoding(
265 &self,
266 font_dict: &PdfDictionary,
267 font_name: &str,
268 invalid_encodings: &mut Vec<String>,
269 ) {
270 if !font_dict.contains_key("Encoding") && !self.is_standard_font(font_name) {
271 if let Some(subtype) = font_dict.get("Subtype").and_then(|v| v.as_name()) {
272 if matches!(subtype.without_slash(), "Type1" | "MMType1" | "TrueType") {
273 invalid_encodings
274 .push(format!("Font '{}' lacks encoding specification", font_name));
275 }
276 }
277 }
278 }
279
280 fn validate_images(&self, report: &mut ValidationReport, document: &PdfDocument) {
281 for node in document.ast.get_all_nodes() {
282 if matches!(node.node_type, NodeType::Image | NodeType::ImageXObject) {
283 if let Some(image_dict) = node.as_dict() {
284 if let Some(filter_value) = image_dict.get("Filter") {
285 let has_lzw = match filter_value {
286 PdfValue::Name(name) => name.without_slash() == "LZWDecode",
287 PdfValue::Array(filters) => filters.iter().any(|f| {
288 f.as_name()
289 .map(|n| n.without_slash() == "LZWDecode")
290 .unwrap_or(false)
291 }),
292 _ => false,
293 };
294
295 if has_lzw && self.strict_mode {
296 report.add_issue(ValidationIssue {
297 severity: ValidationSeverity::Warning,
298 code: "PDF_A_LZW_DECODE".to_string(),
299 message: "LZWDecode filter should be avoided in PDF/A-1"
300 .to_string(),
301 node_id: None,
302 location: Some("Image compression".to_string()),
303 suggestion: Some("Consider using FlateDecode instead".to_string()),
304 });
305 }
306 }
307 }
308 }
309 }
310 }
311
312 fn validate_multimedia_content(&self, report: &mut ValidationReport, document: &PdfDocument) {
313 let mut has_multimedia = false;
314
315 for node in document.ast.get_all_nodes() {
316 if node.node_type == NodeType::Annotation {
317 if let Some(annot_dict) = node.as_dict() {
318 if let Some(subtype) = annot_dict.get("Subtype").and_then(|v| v.as_name()) {
319 match subtype.without_slash() {
320 "Movie" | "Sound" | "Screen" | "RichMedia" => {
321 has_multimedia = true;
322 break;
323 }
324 _ => {}
325 }
326 }
327 }
328 }
329 }
330
331 if has_multimedia {
332 report.add_issue(ValidationIssue {
333 severity: ValidationSeverity::Error,
334 code: "PDF_A_MULTIMEDIA".to_string(),
335 message: "PDF/A-1b does not permit multimedia content".to_string(),
336 node_id: None,
337 location: Some("Multimedia restrictions".to_string()),
338 suggestion: Some(
339 "Remove multimedia annotations like Movie, Sound, or Screen".to_string(),
340 ),
341 });
342 }
343 }
344
345 fn validate_javascript(&self, report: &mut ValidationReport, document: &PdfDocument) {
346 for node in document.ast.get_all_nodes() {
347 if matches!(node.node_type, NodeType::JavaScriptAction) {
348 report.add_issue(ValidationIssue {
349 severity: ValidationSeverity::Error,
350 code: "PDF_A_JAVASCRIPT".to_string(),
351 message: "JavaScript is not permitted in PDF/A-1b".to_string(),
352 node_id: Some(node.id),
353 location: Some("JavaScript action node".to_string()),
354 suggestion: Some("Remove all JavaScript actions".to_string()),
355 });
356 return;
357 }
358 }
359
360 let mut has_javascript = false;
361
362 if let Some(catalog_dict) = document.get_catalog() {
363 if let Some(names_value) = catalog_dict.get("Names") {
364 if let Some(names_dict) = names_value.as_dict() {
365 if names_dict.contains_key("JavaScript") {
366 has_javascript = true;
367 }
368 }
369 }
370
371 if let Some(open_action) = catalog_dict.get("OpenAction") {
372 if let Some(action_dict) = open_action.as_dict() {
373 if let Some(s_value) = action_dict.get("S") {
374 if let Some(s_name) = s_value.as_name() {
375 if s_name.without_slash() == "JavaScript" {
376 has_javascript = true;
377 }
378 }
379 }
380 }
381 }
382 }
383
384 for node in document.ast.get_all_nodes() {
385 if let Some(dict) = node.as_dict() {
386 if let Some(type_value) = dict.get("Type") {
387 if let Some(type_name) = type_value.as_name() {
388 if type_name.without_slash() == "Action" {
389 if let Some(s_value) = dict.get("S") {
390 if let Some(s_name) = s_value.as_name() {
391 if s_name.without_slash() == "JavaScript" {
392 has_javascript = true;
393 break;
394 }
395 }
396 }
397 }
398 }
399 }
400 if matches!(node.node_type, NodeType::Annotation | NodeType::Action) {
401 if let Some(s_value) = dict.get("S") {
402 if let Some(s_name) = s_value.as_name() {
403 if s_name.without_slash() == "JavaScript" {
404 has_javascript = true;
405 break;
406 }
407 }
408 }
409 }
410 }
411 }
412
413 if has_javascript {
414 report.add_issue(ValidationIssue {
415 severity: ValidationSeverity::Error,
416 code: "PDF_A_JAVASCRIPT".to_string(),
417 message: "PDF/A-1b does not permit JavaScript".to_string(),
418 node_id: None,
419 location: Some("JavaScript restrictions".to_string()),
420 suggestion: Some("Remove all JavaScript actions and scripts".to_string()),
421 });
422 }
423 }
424
425 fn validate_annotations(&self, report: &mut ValidationReport, document: &PdfDocument) {
426 let prohibited_subtypes = ["Movie", "Sound", "FileAttachment"];
427
428 for node in document.ast.get_all_nodes() {
429 if matches!(node.node_type, NodeType::Annotation) {
430 if let Some(annot_dict) = node.as_dict() {
431 if let Some(subtype) = annot_dict.get("Subtype").and_then(|v| v.as_name()) {
432 let subtype_str = subtype.without_slash();
433 if prohibited_subtypes.contains(&subtype_str) {
434 report.add_issue(ValidationIssue {
435 severity: ValidationSeverity::Error,
436 code: "PDF_A_ANNOTATION_TYPE".to_string(),
437 message: format!(
438 "Annotation subtype '{}' not permitted in PDF/A-1b",
439 subtype_str
440 ),
441 node_id: None,
442 location: Some("Annotation restrictions".to_string()),
443 suggestion: None,
444 });
445 }
446
447 if !annot_dict.contains_key("AP") && subtype_str != "Popup" {
448 report.add_issue(ValidationIssue {
449 severity: ValidationSeverity::Warning,
450 code: "PDF_A_ANNOTATION_APPEARANCE".to_string(),
451 message: "Annotations should have appearance streams in PDF/A-1b"
452 .to_string(),
453 node_id: None,
454 location: Some("Annotation appearance".to_string()),
455 suggestion: Some(format!(
456 "Annotation of type '{}' lacks appearance",
457 subtype_str
458 )),
459 });
460 }
461 }
462 }
463 }
464 }
465 }
466
467 fn validate_forms(&self, report: &mut ValidationReport, document: &PdfDocument) {
468 if let Some(catalog_dict) = document.get_catalog() {
469 if let Some(acroform_value) = catalog_dict.get("AcroForm") {
470 if let Some(acroform_dict) = acroform_value.as_dict() {
471 if acroform_dict.contains_key("XFA") {
472 report.add_issue(ValidationIssue {
473 severity: ValidationSeverity::Error,
474 code: "PDF_A_XFA".to_string(),
475 message: "XFA forms are not permitted in PDF/A-1b".to_string(),
476 node_id: None,
477 location: Some("Form restrictions".to_string()),
478 suggestion: Some("Use AcroForm instead of XFA".to_string()),
479 });
480 }
481 }
482 }
483 }
484 }
485
486 fn validate_encryption(&self, report: &mut ValidationReport, document: &PdfDocument) {
487 if document.metadata.encrypted {
488 report.add_issue(ValidationIssue {
489 severity: ValidationSeverity::Error,
490 code: "PDF_A_ENCRYPTION".to_string(),
491 message: "PDF/A-1b documents must not be encrypted".to_string(),
492 node_id: None,
493 location: Some("Encryption restrictions".to_string()),
494 suggestion: Some("Remove all encryption from the document".to_string()),
495 });
496 }
497 }
498
499 fn validate_metadata(&self, report: &mut ValidationReport, document: &PdfDocument) {
500 let mut has_xmp_metadata = false;
501
502 if let Some(catalog_dict) = document.get_catalog() {
503 if catalog_dict.contains_key("Metadata") {
504 has_xmp_metadata = true;
505 }
506 }
507
508 if !has_xmp_metadata {
509 report.add_issue(ValidationIssue {
510 severity: ValidationSeverity::Error,
511 code: "PDF_A_XMP_METADATA".to_string(),
512 message: "PDF/A-1b requires XMP metadata in catalog".to_string(),
513 node_id: None,
514 location: Some("Metadata requirements".to_string()),
515 suggestion: Some("Add XMP metadata stream to document catalog".to_string()),
516 });
517 }
518
519 if self.strict_mode {
521 report.add_issue(ValidationIssue {
522 severity: ValidationSeverity::Warning,
523 code: "PDF_A_METADATA_SYNC".to_string(),
524 message: "Verify XMP metadata synchronization with Info dictionary".to_string(),
525 node_id: None,
526 location: Some("Metadata synchronization".to_string()),
527 suggestion: None,
528 });
529 }
530 }
531
532 fn validate_transparency(&self, report: &mut ValidationReport, document: &PdfDocument) {
533 for node in document.ast.get_all_nodes() {
534 if let Some(dict) = node.as_dict() {
535 if dict.contains_key("BM")
537 || dict.contains_key("CA")
538 || dict.contains_key("ca")
539 || dict.contains_key("SMask")
540 {
541 report.add_issue(ValidationIssue {
542 severity: ValidationSeverity::Error,
543 code: "PDF_A_TRANSPARENCY".to_string(),
544 message: "PDF/A-1b does not permit transparency in graphics states"
545 .to_string(),
546 node_id: Some(node.id),
547 location: Some("Graphics state".to_string()),
548 suggestion: Some("Remove transparency effects from ExtGState".to_string()),
549 });
550 return; }
552
553 if let Some(type_value) = dict.get("Type") {
554 if let Some(type_name) = type_value.as_name() {
555 if type_name.without_slash() == "Group" {
556 if let Some(s_value) = dict.get("S") {
557 if let Some(s_name) = s_value.as_name() {
558 if s_name.without_slash() == "Transparency" {
559 report.add_issue(ValidationIssue {
560 severity: ValidationSeverity::Error,
561 code: "PDF_A_TRANSPARENCY".to_string(),
562 message: "PDF/A-1b does not permit transparency groups"
563 .to_string(),
564 node_id: Some(node.id),
565 location: Some("Transparency group".to_string()),
566 suggestion: Some(
567 "Remove transparency group specification"
568 .to_string(),
569 ),
570 });
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578
579 if let Some(s_value) = dict.get("S") {
580 if let Some(s_name) = s_value.as_name() {
581 if s_name.without_slash() == "Transparency" {
582 if let Some(type_value) = dict.get("Type") {
583 if let Some(type_name) = type_value.as_name() {
584 if type_name.without_slash() == "Group" {
585 report.add_issue(ValidationIssue {
586 severity: ValidationSeverity::Error,
587 code: "PDF_A_TRANSPARENCY".to_string(),
588 message: "PDF/A-1b does not permit transparency groups"
589 .to_string(),
590 node_id: Some(node.id),
591 location: Some("Transparency group".to_string()),
592 suggestion: Some(
593 "Remove transparency group specification"
594 .to_string(),
595 ),
596 });
597 return;
598 }
599 }
600 }
601 }
602 }
603 }
604
605 if let Some(group_value) = dict.get("Group") {
606 if let Some(group_dict) = group_value.as_dict() {
607 if let Some(s_value) = group_dict.get("S") {
608 if let Some(s_name) = s_value.as_name() {
609 if s_name.without_slash() == "Transparency" {
610 report.add_issue(ValidationIssue {
611 severity: ValidationSeverity::Error,
612 code: "PDF_A_TRANSPARENCY".to_string(),
613 message: "PDF/A-1b does not permit transparency groups"
614 .to_string(),
615 node_id: Some(node.id),
616 location: Some("Transparency group".to_string()),
617 suggestion: Some(
618 "Remove transparency group specification".to_string(),
619 ),
620 });
621 return;
622 }
623 }
624 }
625 }
626 }
627 }
628 }
629 }
630
631 fn validate_file_specification(&self, report: &mut ValidationReport, document: &PdfDocument) {
632 if document.metadata.has_embedded_files {
633 report.add_issue(ValidationIssue {
634 severity: ValidationSeverity::Error,
635 code: "PDF_A_EMBEDDED_FILES".to_string(),
636 message: "PDF/A-1b does not permit embedded files".to_string(),
637 node_id: None,
638 location: Some("File specification restrictions".to_string()),
639 suggestion: Some("Remove all embedded file attachments".to_string()),
640 });
641 }
642 }
643
644 fn validate_cross_reference(&self, report: &mut ValidationReport, document: &PdfDocument) {
645 let has_xref_tables = !document.xref.entries.is_empty();
647 let has_xref_streams = !document.xref.streams.is_empty();
648
649 if has_xref_tables && has_xref_streams {
650 report.add_issue(ValidationIssue {
651 severity: ValidationSeverity::Warning,
652 code: "PDF_A_XREF_FORMAT".to_string(),
653 message: "Mixed cross-reference formats detected".to_string(),
654 node_id: None,
655 location: Some("Cross-reference validation".to_string()),
656 suggestion: Some(
657 "Consider using consistent cross-reference format throughout".to_string(),
658 ),
659 });
660 }
661 }
662}
663
664impl Default for PdfA1bValidator {
665 fn default() -> Self {
666 Self::new()
667 }
668}