Skip to main content

oxidize_pdf/pdfa/
validator.rs

1//! PDF/A Validation Engine
2//!
3//! This module provides the core validation logic for PDF/A compliance.
4//! It checks PDF documents against the requirements of PDF/A standards.
5
6use super::error::{PdfAError, ValidationError};
7use super::types::{PdfAConformance, PdfALevel, ValidationResult, ValidationWarning};
8use super::xmp::XmpMetadata;
9use crate::parser::PdfReader;
10use std::io::{Read, Seek};
11
12/// Extracted catalog data for validation
13struct CatalogData {
14    metadata_ref: Option<(u32, u16)>,
15    names_ref: Option<(u32, u16)>,
16    names_inline: Option<crate::parser::objects::PdfDictionary>,
17    open_action_ref: Option<(u32, u16)>,
18    open_action_inline: Option<crate::parser::objects::PdfDictionary>,
19    aa_ref: Option<(u32, u16)>,
20    aa_inline: Option<crate::parser::objects::PdfDictionary>,
21}
22
23/// PDF/A Validator
24///
25/// Validates PDF documents against PDF/A standards (ISO 19005).
26///
27/// # Example
28///
29/// ```rust,ignore
30/// use oxidize_pdf::parser::PdfReader;
31/// use oxidize_pdf::pdfa::{PdfAValidator, PdfALevel};
32///
33/// let mut reader = PdfReader::open("document.pdf")?;
34/// let validator = PdfAValidator::new(PdfALevel::A1b);
35/// let result = validator.validate(&mut reader)?;
36///
37/// if result.is_valid() {
38///     println!("Document is PDF/A-1b compliant!");
39/// } else {
40///     for error in result.errors() {
41///         println!("Violation: {}", error);
42///     }
43/// }
44/// ```
45#[derive(Debug, Clone)]
46pub struct PdfAValidator {
47    /// Target PDF/A level for validation
48    level: PdfALevel,
49    /// Whether to collect all errors or stop at first
50    collect_all_errors: bool,
51}
52
53impl PdfAValidator {
54    /// Create a new validator for the specified PDF/A level
55    pub fn new(level: PdfALevel) -> Self {
56        Self {
57            level,
58            collect_all_errors: true,
59        }
60    }
61
62    /// Set whether to collect all errors or stop at first error
63    pub fn collect_all_errors(mut self, collect: bool) -> Self {
64        self.collect_all_errors = collect;
65        self
66    }
67
68    /// Get the target PDF/A level
69    pub fn level(&self) -> PdfALevel {
70        self.level
71    }
72
73    /// Validate a PDF document against the configured PDF/A level
74    ///
75    /// Returns a `ValidationResult` containing all errors and warnings found.
76    pub fn validate<R: Read + Seek>(
77        &self,
78        reader: &mut PdfReader<R>,
79    ) -> Result<ValidationResult, PdfAError> {
80        let mut errors = Vec::new();
81        let mut warnings = Vec::new();
82
83        // Check encryption (forbidden in all PDF/A levels)
84        self.check_encryption(reader, &mut errors);
85
86        // Check PDF version compatibility
87        self.check_pdf_version(reader, &mut errors)?;
88
89        // Extract catalog data we need for validation before doing further operations
90        let catalog_data = self.extract_catalog_data(reader)?;
91
92        // Check XMP metadata (required in all PDF/A levels)
93        self.check_metadata_from_data(reader, &catalog_data, &mut errors, &mut warnings)?;
94
95        // Check for JavaScript (forbidden in all PDF/A levels)
96        self.check_javascript_from_data(reader, &catalog_data, &mut errors)?;
97
98        // Check external references (forbidden in all PDF/A levels)
99        self.check_external_references_from_data(reader, &catalog_data, &mut errors)?;
100
101        // Check transparency (forbidden in PDF/A-1, limited in PDF/A-2+)
102        if !self.level.allows_transparency() {
103            self.check_transparency(reader, &mut errors)?;
104        }
105
106        // Check compression (LZW forbidden in PDF/A-1)
107        if !self.level.allows_lzw() {
108            self.check_lzw_compression(reader, &mut errors)?;
109        }
110
111        // Check embedded files
112        self.check_embedded_files(reader, &catalog_data, &mut errors)?;
113
114        // Check fonts (must be embedded, Level A requires ToUnicode)
115        self.check_fonts(reader, &mut errors)?;
116
117        // Check color spaces and output intent
118        self.check_color_spaces(reader, &mut errors)?;
119
120        Ok(ValidationResult::with_errors_and_warnings(
121            self.level, errors, warnings,
122        ))
123    }
124
125    /// Check if PDF is encrypted (encryption is forbidden in PDF/A)
126    fn check_encryption<R: Read + Seek>(
127        &self,
128        reader: &PdfReader<R>,
129        errors: &mut Vec<ValidationError>,
130    ) {
131        if reader.is_encrypted() {
132            errors.push(ValidationError::EncryptionForbidden);
133        }
134    }
135
136    /// Check PDF version compatibility with the target PDF/A level
137    fn check_pdf_version<R: Read + Seek>(
138        &self,
139        reader: &PdfReader<R>,
140        errors: &mut Vec<ValidationError>,
141    ) -> Result<(), PdfAError> {
142        let version = reader.version();
143        let version_str = version.to_string();
144
145        let required = self.level.required_pdf_version();
146
147        // Parse versions for comparison
148        let actual_parts: Vec<u8> = version_str
149            .split('.')
150            .filter_map(|s| s.parse().ok())
151            .collect();
152
153        // Get major and minor versions
154        let (actual_major, actual_minor) = (
155            actual_parts.first().copied().unwrap_or(1),
156            actual_parts.get(1).copied().unwrap_or(0),
157        );
158
159        // For PDF/A-1, PDF version must be exactly 1.4
160        // For PDF/A-2 and PDF/A-3, PDF version must be 1.7 or compatible
161        let is_compatible = match self.level.part() {
162            1 => actual_major == 1 && actual_minor == 4,
163            2 | 3 => actual_major == 1 && actual_minor >= 4 && actual_minor <= 7,
164            _ => false,
165        };
166
167        if !is_compatible {
168            errors.push(ValidationError::IncompatiblePdfVersion {
169                actual: version_str,
170                required: required.to_string(),
171            });
172        }
173
174        Ok(())
175    }
176
177    /// Extract data from catalog that we need for validation
178    fn extract_catalog_data<R: Read + Seek>(
179        &self,
180        reader: &mut PdfReader<R>,
181    ) -> Result<CatalogData, PdfAError> {
182        let catalog = reader
183            .catalog()
184            .map_err(|e| PdfAError::ParseError(e.to_string()))?;
185
186        let metadata_ref = catalog
187            .get("Metadata")
188            .and_then(|obj| obj.as_reference())
189            .map(|(n, g)| (n, g));
190
191        let names_ref = catalog
192            .get("Names")
193            .and_then(|obj| obj.as_reference())
194            .map(|(n, g)| (n, g));
195
196        let names_inline = catalog.get("Names").and_then(|obj| obj.as_dict()).cloned();
197
198        let open_action_ref = catalog
199            .get("OpenAction")
200            .and_then(|obj| obj.as_reference())
201            .map(|(n, g)| (n, g));
202
203        let open_action_inline = catalog
204            .get("OpenAction")
205            .and_then(|obj| obj.as_dict())
206            .cloned();
207
208        let aa_ref = catalog
209            .get("AA")
210            .and_then(|obj| obj.as_reference())
211            .map(|(n, g)| (n, g));
212
213        let aa_inline = catalog.get("AA").and_then(|obj| obj.as_dict()).cloned();
214
215        Ok(CatalogData {
216            metadata_ref,
217            names_ref,
218            names_inline,
219            open_action_ref,
220            open_action_inline,
221            aa_ref,
222            aa_inline,
223        })
224    }
225
226    /// Check for XMP metadata using extracted catalog data
227    fn check_metadata_from_data<R: Read + Seek>(
228        &self,
229        reader: &mut PdfReader<R>,
230        catalog_data: &CatalogData,
231        errors: &mut Vec<ValidationError>,
232        _warnings: &mut Vec<ValidationWarning>,
233    ) -> Result<(), PdfAError> {
234        // Check for Metadata stream
235        let metadata_ref = match catalog_data.metadata_ref {
236            Some(r) => r,
237            None => {
238                errors.push(ValidationError::XmpMetadataMissing);
239                return Ok(());
240            }
241        };
242
243        // Resolve the reference
244        let obj = reader
245            .get_object(metadata_ref.0, metadata_ref.1)
246            .map_err(|e| PdfAError::ParseError(e.to_string()))?;
247
248        // Check if it's a stream
249        let stream = match obj.as_stream() {
250            Some(s) => s,
251            None => {
252                errors.push(ValidationError::XmpMetadataMissing);
253                return Ok(());
254            }
255        };
256
257        // Parse the XMP metadata
258        let xmp_data = String::from_utf8_lossy(&stream.data);
259        let xmp = match XmpMetadata::parse(&xmp_data) {
260            Ok(x) => x,
261            Err(_) => {
262                errors.push(ValidationError::XmpMetadataMissing);
263                return Ok(());
264            }
265        };
266
267        // Check for PDF/A identifier
268        match &xmp.pdfa_id {
269            None => {
270                errors.push(ValidationError::XmpMissingPdfAIdentifier);
271            }
272            Some(pdfa_id) => {
273                // Validate the PDF/A identifier matches our target level
274                let expected_part = self.level.part();
275                let expected_conformance = self.level.conformance();
276
277                if pdfa_id.part != expected_part {
278                    errors.push(ValidationError::XmpInvalidPdfAIdentifier {
279                        details: format!(
280                            "Part mismatch: expected {}, found {}",
281                            expected_part, pdfa_id.part
282                        ),
283                    });
284                } else if pdfa_id.conformance != expected_conformance {
285                    errors.push(ValidationError::XmpInvalidPdfAIdentifier {
286                        details: format!(
287                            "Conformance mismatch: expected {:?}, found {:?}",
288                            expected_conformance, pdfa_id.conformance
289                        ),
290                    });
291                }
292            }
293        }
294
295        Ok(())
296    }
297
298    /// Check for JavaScript using extracted catalog data
299    fn check_javascript_from_data<R: Read + Seek>(
300        &self,
301        reader: &mut PdfReader<R>,
302        catalog_data: &CatalogData,
303        errors: &mut Vec<ValidationError>,
304    ) -> Result<(), PdfAError> {
305        // Check Names dictionary for JavaScript
306        if let Some((obj_num, gen_num)) = catalog_data.names_ref {
307            let names_obj = reader
308                .get_object(obj_num, gen_num)
309                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
310
311            if let Some(names_dict) = names_obj.as_dict() {
312                if names_dict.get("JavaScript").is_some() {
313                    errors.push(ValidationError::JavaScriptForbidden {
314                        location: "Names/JavaScript".to_string(),
315                    });
316                }
317            }
318        } else if let Some(ref names_dict) = catalog_data.names_inline {
319            if names_dict.get("JavaScript").is_some() {
320                errors.push(ValidationError::JavaScriptForbidden {
321                    location: "Names/JavaScript".to_string(),
322                });
323            }
324        }
325
326        // Check OpenAction for JavaScript
327        if let Some((obj_num, gen_num)) = catalog_data.open_action_ref {
328            let action_obj = reader
329                .get_object(obj_num, gen_num)
330                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
331
332            if let Some(action_dict) = action_obj.as_dict() {
333                if self.is_javascript_action(action_dict) {
334                    errors.push(ValidationError::JavaScriptForbidden {
335                        location: "OpenAction".to_string(),
336                    });
337                }
338            }
339        } else if let Some(ref action_dict) = catalog_data.open_action_inline {
340            if self.is_javascript_action(action_dict) {
341                errors.push(ValidationError::JavaScriptForbidden {
342                    location: "OpenAction".to_string(),
343                });
344            }
345        }
346
347        // Check AA (Additional Actions) dictionary
348        if let Some((obj_num, gen_num)) = catalog_data.aa_ref {
349            let aa_obj = reader
350                .get_object(obj_num, gen_num)
351                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
352
353            if let Some(aa_dict) = aa_obj.as_dict().cloned() {
354                if self.check_aa_dict_for_javascript(reader, &aa_dict)? {
355                    errors.push(ValidationError::JavaScriptForbidden {
356                        location: "Catalog/AA".to_string(),
357                    });
358                }
359            }
360        } else if let Some(ref aa_dict) = catalog_data.aa_inline {
361            if self.check_aa_dict_for_javascript(reader, aa_dict)? {
362                errors.push(ValidationError::JavaScriptForbidden {
363                    location: "Catalog/AA".to_string(),
364                });
365            }
366        }
367
368        Ok(())
369    }
370
371    /// Check if a dictionary is a JavaScript action
372    fn is_javascript_action(&self, dict: &crate::parser::objects::PdfDictionary) -> bool {
373        if let Some(action_type) = dict.get("S") {
374            if let Some(name) = action_type.as_name() {
375                return name.0 == "JavaScript";
376            }
377        }
378        false
379    }
380
381    /// Check AA dictionary for JavaScript actions
382    fn check_aa_dict_for_javascript<R: Read + Seek>(
383        &self,
384        reader: &mut PdfReader<R>,
385        aa_dict: &crate::parser::objects::PdfDictionary,
386    ) -> Result<bool, PdfAError> {
387        // Check each action in the AA dictionary
388        for (_key, value) in aa_dict.0.iter() {
389            let action_dict = if let Some((obj_num, gen_num)) = value.as_reference() {
390                let obj = reader
391                    .get_object(obj_num, gen_num)
392                    .map_err(|e| PdfAError::ParseError(e.to_string()))?;
393                obj.as_dict().cloned()
394            } else {
395                value.as_dict().cloned()
396            };
397
398            if let Some(dict) = action_dict {
399                if self.is_javascript_action(&dict) {
400                    return Ok(true);
401                }
402            }
403        }
404
405        Ok(false)
406    }
407
408    /// Check external references using extracted catalog data
409    fn check_external_references_from_data<R: Read + Seek>(
410        &self,
411        reader: &mut PdfReader<R>,
412        catalog_data: &CatalogData,
413        errors: &mut Vec<ValidationError>,
414    ) -> Result<(), PdfAError> {
415        // Check OpenAction for remote GoTo
416        if let Some((obj_num, gen_num)) = catalog_data.open_action_ref {
417            let action_obj = reader
418                .get_object(obj_num, gen_num)
419                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
420
421            if let Some(action_dict) = action_obj.as_dict() {
422                self.check_for_external_action(action_dict, errors);
423            }
424        } else if let Some(ref action_dict) = catalog_data.open_action_inline {
425            self.check_for_external_action(action_dict, errors);
426        }
427
428        Ok(())
429    }
430
431    /// Check if action is an external reference
432    fn check_for_external_action(
433        &self,
434        dict: &crate::parser::objects::PdfDictionary,
435        errors: &mut Vec<ValidationError>,
436    ) {
437        if let Some(action_type) = dict.get("S") {
438            if let Some(name) = action_type.as_name() {
439                if name.0 == "GoToR" || name.0 == "GoToE" || name.0 == "Launch" {
440                    errors.push(ValidationError::ExternalReferenceForbidden {
441                        reference_type: name.0.clone(),
442                    });
443                }
444            }
445        }
446    }
447
448    /// Check for transparency usage (forbidden in PDF/A-1)
449    fn check_transparency<R: Read + Seek>(
450        &self,
451        reader: &mut PdfReader<R>,
452        errors: &mut Vec<ValidationError>,
453    ) -> Result<(), PdfAError> {
454        let page_count = reader
455            .page_count()
456            .map_err(|e| PdfAError::ParseError(e.to_string()))?;
457
458        for page_idx in 0..page_count {
459            // Get page dictionary
460            let page_dict = self.get_page_dict(reader, page_idx)?;
461
462            // Check Resources for ExtGState with transparency
463            if let Some(resources) = self.get_resources_dict(reader, &page_dict)? {
464                // Check ExtGState entries
465                if let Some(ext_gstate) = resources.get("ExtGState") {
466                    self.check_ext_gstate_transparency(reader, ext_gstate, page_idx, errors)?;
467                }
468
469                // Check XObject entries for transparency groups
470                if let Some(xobjects) = resources.get("XObject") {
471                    self.check_xobject_transparency(reader, xobjects, page_idx, errors)?;
472                }
473            }
474        }
475
476        Ok(())
477    }
478
479    /// Get a page dictionary by index
480    fn get_page_dict<R: Read + Seek>(
481        &self,
482        reader: &mut PdfReader<R>,
483        page_idx: u32,
484    ) -> Result<crate::parser::objects::PdfDictionary, PdfAError> {
485        // Get pages dict
486        let pages_dict = reader
487            .pages()
488            .map_err(|e| PdfAError::ParseError(e.to_string()))?
489            .clone();
490
491        // Get Kids array
492        let kids = pages_dict
493            .get("Kids")
494            .and_then(|k| k.as_array())
495            .ok_or_else(|| PdfAError::ParseError("Pages missing Kids array".to_string()))?;
496
497        // Get page reference
498        let page_ref = kids
499            .0
500            .get(page_idx as usize)
501            .ok_or_else(|| PdfAError::ParseError(format!("Page {} not found", page_idx)))?;
502
503        // Resolve page reference
504        if let Some((obj_num, gen_num)) = page_ref.as_reference() {
505            let page_obj = reader
506                .get_object(obj_num, gen_num)
507                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
508            page_obj
509                .as_dict()
510                .cloned()
511                .ok_or_else(|| PdfAError::ParseError("Page is not a dictionary".to_string()))
512        } else if let Some(dict) = page_ref.as_dict() {
513            Ok(dict.clone())
514        } else {
515            Err(PdfAError::ParseError("Invalid page reference".to_string()))
516        }
517    }
518
519    /// Get Resources dictionary from page, resolving if needed
520    fn get_resources_dict<R: Read + Seek>(
521        &self,
522        reader: &mut PdfReader<R>,
523        page_dict: &crate::parser::objects::PdfDictionary,
524    ) -> Result<Option<crate::parser::objects::PdfDictionary>, PdfAError> {
525        let resources_obj = match page_dict.get("Resources") {
526            Some(obj) => obj,
527            None => return Ok(None),
528        };
529
530        if let Some((obj_num, gen_num)) = resources_obj.as_reference() {
531            let resolved = reader
532                .get_object(obj_num, gen_num)
533                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
534            Ok(resolved.as_dict().cloned())
535        } else {
536            Ok(resources_obj.as_dict().cloned())
537        }
538    }
539
540    /// Check ExtGState dictionary for transparency settings
541    fn check_ext_gstate_transparency<R: Read + Seek>(
542        &self,
543        reader: &mut PdfReader<R>,
544        ext_gstate_obj: &crate::parser::objects::PdfObject,
545        page_idx: u32,
546        errors: &mut Vec<ValidationError>,
547    ) -> Result<(), PdfAError> {
548        let ext_gstate_dict = if let Some((obj_num, gen_num)) = ext_gstate_obj.as_reference() {
549            let obj = reader
550                .get_object(obj_num, gen_num)
551                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
552            obj.as_dict().cloned()
553        } else {
554            ext_gstate_obj.as_dict().cloned()
555        };
556
557        let ext_gstate_dict = match ext_gstate_dict {
558            Some(d) => d,
559            None => return Ok(()),
560        };
561
562        // Check each graphics state entry
563        for (gs_name, gs_value) in ext_gstate_dict.0.iter() {
564            let gs_dict = if let Some((obj_num, gen_num)) = gs_value.as_reference() {
565                let obj = reader
566                    .get_object(obj_num, gen_num)
567                    .map_err(|e| PdfAError::ParseError(e.to_string()))?;
568                obj.as_dict().cloned()
569            } else {
570                gs_value.as_dict().cloned()
571            };
572
573            if let Some(gs_dict) = gs_dict {
574                // Check for CA (stroke alpha) != 1.0
575                if let Some(ca) = gs_dict.get("CA") {
576                    let val = ca.as_real().or_else(|| ca.as_integer().map(|i| i as f64));
577                    if let Some(val) = val {
578                        if (val - 1.0).abs() > f64::EPSILON {
579                            errors.push(ValidationError::TransparencyForbidden {
580                                location: format!(
581                                    "Page {}, ExtGState/{}/CA",
582                                    page_idx + 1,
583                                    &gs_name.0
584                                ),
585                            });
586                        }
587                    }
588                }
589
590                // Check for ca (fill alpha) != 1.0
591                if let Some(ca) = gs_dict.get("ca") {
592                    let val = ca.as_real().or_else(|| ca.as_integer().map(|i| i as f64));
593                    if let Some(val) = val {
594                        if (val - 1.0).abs() > f64::EPSILON {
595                            errors.push(ValidationError::TransparencyForbidden {
596                                location: format!(
597                                    "Page {}, ExtGState/{}/ca",
598                                    page_idx + 1,
599                                    &gs_name.0
600                                ),
601                            });
602                        }
603                    }
604                }
605
606                // Check for SMask
607                if let Some(smask) = gs_dict.get("SMask") {
608                    // SMask /None is allowed
609                    let is_none = smask.as_name().map(|n| n.0 == "None").unwrap_or(false);
610                    if !is_none {
611                        errors.push(ValidationError::TransparencyForbidden {
612                            location: format!(
613                                "Page {}, ExtGState/{}/SMask",
614                                page_idx + 1,
615                                &gs_name.0
616                            ),
617                        });
618                    }
619                }
620
621                // Check for BM (blend mode) != Normal
622                if let Some(bm) = gs_dict.get("BM") {
623                    if let Some(name) = bm.as_name() {
624                        if name.0 != "Normal" && name.0 != "Compatible" {
625                            errors.push(ValidationError::TransparencyForbidden {
626                                location: format!(
627                                    "Page {}, ExtGState/{}/BM={}",
628                                    page_idx + 1,
629                                    &gs_name.0,
630                                    &name.0
631                                ),
632                            });
633                        }
634                    }
635                }
636            }
637        }
638
639        Ok(())
640    }
641
642    /// Check XObject dictionary for transparency groups
643    fn check_xobject_transparency<R: Read + Seek>(
644        &self,
645        reader: &mut PdfReader<R>,
646        xobject_obj: &crate::parser::objects::PdfObject,
647        page_idx: u32,
648        errors: &mut Vec<ValidationError>,
649    ) -> Result<(), PdfAError> {
650        let xobject_dict = if let Some((obj_num, gen_num)) = xobject_obj.as_reference() {
651            let obj = reader
652                .get_object(obj_num, gen_num)
653                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
654            obj.as_dict().cloned()
655        } else {
656            xobject_obj.as_dict().cloned()
657        };
658
659        let xobject_dict = match xobject_dict {
660            Some(d) => d,
661            None => return Ok(()),
662        };
663
664        // Check each XObject entry
665        for (xo_name, xo_value) in xobject_dict.0.iter() {
666            let xo_stream_dict = if let Some((obj_num, gen_num)) = xo_value.as_reference() {
667                let obj = reader
668                    .get_object(obj_num, gen_num)
669                    .map_err(|e| PdfAError::ParseError(e.to_string()))?;
670                if let Some(stream) = obj.as_stream() {
671                    Some(stream.dict.clone())
672                } else {
673                    obj.as_dict().cloned()
674                }
675            } else if let Some(stream) = xo_value.as_stream() {
676                Some(stream.dict.clone())
677            } else {
678                xo_value.as_dict().cloned()
679            };
680
681            if let Some(xo_dict) = xo_stream_dict {
682                // Check for transparency group (/Group with /S /Transparency)
683                if let Some(group) = xo_dict.get("Group") {
684                    let group_dict = if let Some((obj_num, gen_num)) = group.as_reference() {
685                        let obj = reader
686                            .get_object(obj_num, gen_num)
687                            .map_err(|e| PdfAError::ParseError(e.to_string()))?;
688                        obj.as_dict().cloned()
689                    } else {
690                        group.as_dict().cloned()
691                    };
692
693                    if let Some(group_dict) = group_dict {
694                        if let Some(s) = group_dict.get("S") {
695                            if let Some(name) = s.as_name() {
696                                if name.0 == "Transparency" {
697                                    errors.push(ValidationError::TransparencyForbidden {
698                                        location: format!(
699                                            "Page {}, XObject/{} has transparency group",
700                                            page_idx + 1,
701                                            &xo_name.0
702                                        ),
703                                    });
704                                }
705                            }
706                        }
707                    }
708                }
709
710                // Check for SMask in Image XObjects
711                if let Some(subtype) = xo_dict.get("Subtype") {
712                    if let Some(name) = subtype.as_name() {
713                        if name.0 == "Image" {
714                            if xo_dict.get("SMask").is_some() {
715                                errors.push(ValidationError::TransparencyForbidden {
716                                    location: format!(
717                                        "Page {}, Image XObject/{} has SMask",
718                                        page_idx + 1,
719                                        &xo_name.0
720                                    ),
721                                });
722                            }
723                        }
724                    }
725                }
726            }
727        }
728
729        Ok(())
730    }
731
732    /// Check for LZW compression (forbidden in PDF/A-1)
733    ///
734    /// Note: This performs a sample check on page resources. A full implementation
735    /// would scan all streams in the document.
736    fn check_lzw_compression<R: Read + Seek>(
737        &self,
738        reader: &mut PdfReader<R>,
739        errors: &mut Vec<ValidationError>,
740    ) -> Result<(), PdfAError> {
741        let page_count = reader
742            .page_count()
743            .map_err(|e| PdfAError::ParseError(e.to_string()))?;
744
745        for page_idx in 0..page_count {
746            let page_dict = self.get_page_dict(reader, page_idx)?;
747
748            // Check Resources for XObjects that might use LZW
749            if let Some(resources) = self.get_resources_dict(reader, &page_dict)? {
750                if let Some(xobjects) = resources.get("XObject") {
751                    self.check_xobjects_for_lzw(reader, xobjects, page_idx, errors)?;
752                }
753            }
754
755            // Check content stream(s)
756            if let Some(contents) = page_dict.get("Contents") {
757                self.check_contents_for_lzw(reader, contents, page_idx, errors)?;
758            }
759        }
760
761        Ok(())
762    }
763
764    /// Check XObjects for LZW compression
765    fn check_xobjects_for_lzw<R: Read + Seek>(
766        &self,
767        reader: &mut PdfReader<R>,
768        xobject_obj: &crate::parser::objects::PdfObject,
769        page_idx: u32,
770        errors: &mut Vec<ValidationError>,
771    ) -> Result<(), PdfAError> {
772        let xobject_dict = if let Some((obj_num, gen_num)) = xobject_obj.as_reference() {
773            let obj = reader
774                .get_object(obj_num, gen_num)
775                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
776            obj.as_dict().cloned()
777        } else {
778            xobject_obj.as_dict().cloned()
779        };
780
781        let xobject_dict = match xobject_dict {
782            Some(d) => d,
783            None => return Ok(()),
784        };
785
786        for (_xo_name, xo_value) in xobject_dict.0.iter() {
787            if let Some((obj_num, gen_num)) = xo_value.as_reference() {
788                if let Ok(obj) = reader.get_object(obj_num, gen_num) {
789                    if let Some(stream) = obj.as_stream() {
790                        self.check_stream_for_lzw(&stream.dict, obj_num, page_idx, errors);
791                    }
792                }
793            }
794        }
795
796        Ok(())
797    }
798
799    /// Check content streams for LZW compression
800    fn check_contents_for_lzw<R: Read + Seek>(
801        &self,
802        reader: &mut PdfReader<R>,
803        contents: &crate::parser::objects::PdfObject,
804        page_idx: u32,
805        errors: &mut Vec<ValidationError>,
806    ) -> Result<(), PdfAError> {
807        // Contents can be a reference or an array of references
808        if let Some((obj_num, gen_num)) = contents.as_reference() {
809            if let Ok(obj) = reader.get_object(obj_num, gen_num) {
810                if let Some(stream) = obj.as_stream() {
811                    self.check_stream_for_lzw(&stream.dict, obj_num, page_idx, errors);
812                }
813            }
814        } else if let Some(arr) = contents.as_array() {
815            for item in &arr.0 {
816                if let Some((obj_num, gen_num)) = item.as_reference() {
817                    if let Ok(obj) = reader.get_object(obj_num, gen_num) {
818                        if let Some(stream) = obj.as_stream() {
819                            self.check_stream_for_lzw(&stream.dict, obj_num, page_idx, errors);
820                        }
821                    }
822                }
823            }
824        }
825
826        Ok(())
827    }
828
829    /// Check a stream dictionary for LZW filter
830    fn check_stream_for_lzw(
831        &self,
832        dict: &crate::parser::objects::PdfDictionary,
833        obj_num: u32,
834        page_idx: u32,
835        errors: &mut Vec<ValidationError>,
836    ) {
837        if let Some(filter) = dict.get("Filter") {
838            // Filter can be a name or array
839            if let Some(name) = filter.as_name() {
840                if name.0 == "LZWDecode" {
841                    errors.push(ValidationError::LzwCompressionForbidden {
842                        object_id: format!("page {}, object {} 0", page_idx + 1, obj_num),
843                    });
844                }
845            } else if let Some(arr) = filter.as_array() {
846                for (idx, f) in arr.0.iter().enumerate() {
847                    if let Some(name) = f.as_name() {
848                        if name.0 == "LZWDecode" {
849                            errors.push(ValidationError::LzwCompressionForbidden {
850                                object_id: format!(
851                                    "page {}, object {} 0 (filter {})",
852                                    page_idx + 1,
853                                    obj_num,
854                                    idx
855                                ),
856                            });
857                        }
858                    }
859                }
860            }
861        }
862    }
863
864    /// Check for embedded files (forbidden in PDF/A-1 and PDF/A-2)
865    fn check_embedded_files<R: Read + Seek>(
866        &self,
867        reader: &mut PdfReader<R>,
868        catalog_data: &CatalogData,
869        errors: &mut Vec<ValidationError>,
870    ) -> Result<(), PdfAError> {
871        if self.level.allows_embedded_files() {
872            // PDF/A-3 allows embedded files, but they must have proper metadata
873            // Full validation would check AF entries
874            return Ok(());
875        }
876
877        // Check Names/EmbeddedFiles
878        if let Some((obj_num, gen_num)) = catalog_data.names_ref {
879            let names_obj = reader
880                .get_object(obj_num, gen_num)
881                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
882
883            if let Some(names_dict) = names_obj.as_dict() {
884                if names_dict.get("EmbeddedFiles").is_some() {
885                    errors.push(ValidationError::EmbeddedFileForbidden);
886                }
887            }
888        } else if let Some(ref names_dict) = catalog_data.names_inline {
889            if names_dict.get("EmbeddedFiles").is_some() {
890                errors.push(ValidationError::EmbeddedFileForbidden);
891            }
892        }
893
894        Ok(())
895    }
896
897    /// Check that all fonts are properly embedded
898    fn check_fonts<R: Read + Seek>(
899        &self,
900        reader: &mut PdfReader<R>,
901        errors: &mut Vec<ValidationError>,
902    ) -> Result<(), PdfAError> {
903        let page_count = reader
904            .page_count()
905            .map_err(|e| PdfAError::ParseError(e.to_string()))?;
906        let requires_tounicode = self.level.conformance() == PdfAConformance::A;
907
908        for page_idx in 0..page_count {
909            let page_dict = self.get_page_dict(reader, page_idx)?;
910
911            if let Some(resources) = self.get_resources_dict(reader, &page_dict)? {
912                if let Some(fonts_obj) = resources.get("Font") {
913                    self.check_font_resources(reader, fonts_obj, requires_tounicode, errors)?;
914                }
915            }
916        }
917
918        Ok(())
919    }
920
921    /// Check font resources dictionary
922    fn check_font_resources<R: Read + Seek>(
923        &self,
924        reader: &mut PdfReader<R>,
925        fonts_obj: &crate::parser::objects::PdfObject,
926        requires_tounicode: bool,
927        errors: &mut Vec<ValidationError>,
928    ) -> Result<(), PdfAError> {
929        let fonts_dict = if let Some((obj_num, gen_num)) = fonts_obj.as_reference() {
930            let obj = reader
931                .get_object(obj_num, gen_num)
932                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
933            obj.as_dict().cloned()
934        } else {
935            fonts_obj.as_dict().cloned()
936        };
937
938        let fonts_dict = match fonts_dict {
939            Some(d) => d,
940            None => return Ok(()),
941        };
942
943        // Check each font
944        for (font_name, font_ref) in fonts_dict.0.iter() {
945            let font_dict = if let Some((obj_num, gen_num)) = font_ref.as_reference() {
946                let obj = reader
947                    .get_object(obj_num, gen_num)
948                    .map_err(|e| PdfAError::ParseError(e.to_string()))?;
949                obj.as_dict().cloned()
950            } else {
951                font_ref.as_dict().cloned()
952            };
953
954            if let Some(font_dict) = font_dict {
955                self.check_single_font(
956                    reader,
957                    &font_name.0,
958                    &font_dict,
959                    requires_tounicode,
960                    errors,
961                )?;
962            }
963        }
964
965        Ok(())
966    }
967
968    /// Check a single font for PDF/A compliance
969    fn check_single_font<R: Read + Seek>(
970        &self,
971        reader: &mut PdfReader<R>,
972        font_name: &str,
973        font_dict: &crate::parser::objects::PdfDictionary,
974        requires_tounicode: bool,
975        errors: &mut Vec<ValidationError>,
976    ) -> Result<(), PdfAError> {
977        // Get font type
978        let font_type = font_dict
979            .get("Subtype")
980            .and_then(|s| s.as_name())
981            .map(|n| n.0.clone())
982            .unwrap_or_default();
983
984        // Type3 fonts have different requirements
985        if font_type == "Type3" {
986            // Type3 fonts are always considered "embedded" as they define glyphs inline
987            // But for Level A, they still need character mapping
988            if requires_tounicode && font_dict.get("ToUnicode").is_none() {
989                errors.push(ValidationError::FontMissingToUnicode {
990                    font_name: font_name.to_string(),
991                });
992            }
993            return Ok(());
994        }
995
996        // For Type0 (composite) fonts, check the descendant font
997        if font_type == "Type0" {
998            return self.check_type0_font(reader, font_name, font_dict, requires_tounicode, errors);
999        }
1000
1001        // Check FontDescriptor for embedding (Type1, TrueType, etc.)
1002        let font_descriptor = self.get_font_descriptor(reader, font_dict)?;
1003
1004        if let Some(desc) = font_descriptor {
1005            // Check for font embedding: FontFile, FontFile2, or FontFile3
1006            let has_fontfile = desc.get("FontFile").is_some()
1007                || desc.get("FontFile2").is_some()
1008                || desc.get("FontFile3").is_some();
1009
1010            if !has_fontfile {
1011                errors.push(ValidationError::FontNotEmbedded {
1012                    font_name: font_name.to_string(),
1013                });
1014            }
1015        } else {
1016            // No FontDescriptor means the font is not embedded
1017            // Exception: standard 14 fonts technically don't need FontDescriptor
1018            // but PDF/A still requires them to be embedded
1019            errors.push(ValidationError::FontNotEmbedded {
1020                font_name: font_name.to_string(),
1021            });
1022        }
1023
1024        // For Level A conformance, check ToUnicode
1025        if requires_tounicode && font_dict.get("ToUnicode").is_none() {
1026            // Check if font has proper encoding that allows Unicode mapping
1027            let has_encoding = font_dict.get("Encoding").is_some();
1028            if !has_encoding {
1029                errors.push(ValidationError::FontMissingToUnicode {
1030                    font_name: font_name.to_string(),
1031                });
1032            }
1033        }
1034
1035        Ok(())
1036    }
1037
1038    /// Check Type0 (composite) font for embedding
1039    fn check_type0_font<R: Read + Seek>(
1040        &self,
1041        reader: &mut PdfReader<R>,
1042        font_name: &str,
1043        font_dict: &crate::parser::objects::PdfDictionary,
1044        requires_tounicode: bool,
1045        errors: &mut Vec<ValidationError>,
1046    ) -> Result<(), PdfAError> {
1047        // Get DescendantFonts array
1048        let descendants = match font_dict.get("DescendantFonts") {
1049            Some(d) => d,
1050            None => {
1051                errors.push(ValidationError::FontNotEmbedded {
1052                    font_name: font_name.to_string(),
1053                });
1054                return Ok(());
1055            }
1056        };
1057
1058        let desc_array = if let Some((obj_num, gen_num)) = descendants.as_reference() {
1059            let obj = reader
1060                .get_object(obj_num, gen_num)
1061                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
1062            obj.as_array().cloned()
1063        } else {
1064            descendants.as_array().cloned()
1065        };
1066
1067        let desc_array = match desc_array {
1068            Some(a) => a,
1069            None => return Ok(()),
1070        };
1071
1072        // Check first descendant font (CIDFont)
1073        if let Some(cid_font_ref) = desc_array.0.first() {
1074            let cid_font_dict = if let Some((obj_num, gen_num)) = cid_font_ref.as_reference() {
1075                let obj = reader
1076                    .get_object(obj_num, gen_num)
1077                    .map_err(|e| PdfAError::ParseError(e.to_string()))?;
1078                obj.as_dict().cloned()
1079            } else {
1080                cid_font_ref.as_dict().cloned()
1081            };
1082
1083            if let Some(cid_dict) = cid_font_dict {
1084                // Check CIDFont's FontDescriptor
1085                let font_descriptor = self.get_font_descriptor(reader, &cid_dict)?;
1086
1087                if let Some(desc) = font_descriptor {
1088                    let has_fontfile = desc.get("FontFile").is_some()
1089                        || desc.get("FontFile2").is_some()
1090                        || desc.get("FontFile3").is_some();
1091
1092                    if !has_fontfile {
1093                        errors.push(ValidationError::FontNotEmbedded {
1094                            font_name: font_name.to_string(),
1095                        });
1096                    }
1097                } else {
1098                    errors.push(ValidationError::FontNotEmbedded {
1099                        font_name: font_name.to_string(),
1100                    });
1101                }
1102            }
1103        }
1104
1105        // For Level A, check ToUnicode or CMap
1106        if requires_tounicode && font_dict.get("ToUnicode").is_none() {
1107            // Type0 fonts might use Identity-H/V encoding which is acceptable
1108            // if combined with proper CIDToGIDMap
1109            let encoding = font_dict.get("Encoding").and_then(|e| e.as_name());
1110            let is_identity = encoding
1111                .map(|n| n.0 == "Identity-H" || n.0 == "Identity-V")
1112                .unwrap_or(false);
1113
1114            if !is_identity {
1115                errors.push(ValidationError::FontMissingToUnicode {
1116                    font_name: font_name.to_string(),
1117                });
1118            }
1119        }
1120
1121        Ok(())
1122    }
1123
1124    /// Get FontDescriptor from a font dictionary
1125    fn get_font_descriptor<R: Read + Seek>(
1126        &self,
1127        reader: &mut PdfReader<R>,
1128        font_dict: &crate::parser::objects::PdfDictionary,
1129    ) -> Result<Option<crate::parser::objects::PdfDictionary>, PdfAError> {
1130        let desc_ref = match font_dict.get("FontDescriptor") {
1131            Some(d) => d,
1132            None => return Ok(None),
1133        };
1134
1135        if let Some((obj_num, gen_num)) = desc_ref.as_reference() {
1136            let obj = reader
1137                .get_object(obj_num, gen_num)
1138                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
1139            Ok(obj.as_dict().cloned())
1140        } else {
1141            Ok(desc_ref.as_dict().cloned())
1142        }
1143    }
1144
1145    /// Check color spaces for PDF/A compliance
1146    ///
1147    /// PDF/A requires device-independent color spaces or a properly defined
1148    /// OutputIntent. Device-dependent color spaces (DeviceRGB, DeviceCMYK,
1149    /// DeviceGray) are only allowed if an OutputIntent is present.
1150    fn check_color_spaces<R: Read + Seek>(
1151        &self,
1152        reader: &mut PdfReader<R>,
1153        errors: &mut Vec<ValidationError>,
1154    ) -> Result<(), PdfAError> {
1155        // First, check if there's an OutputIntent in the catalog
1156        let has_output_intent = self.has_output_intent(reader)?;
1157
1158        let page_count = reader
1159            .page_count()
1160            .map_err(|e| PdfAError::ParseError(e.to_string()))?;
1161
1162        for page_idx in 0..page_count {
1163            let page_dict = self.get_page_dict(reader, page_idx)?;
1164
1165            if let Some(resources) = self.get_resources_dict(reader, &page_dict)? {
1166                // Check ColorSpace dictionary
1167                if let Some(cs_obj) = resources.get("ColorSpace") {
1168                    self.check_colorspace_dict(
1169                        reader,
1170                        cs_obj,
1171                        page_idx,
1172                        has_output_intent,
1173                        errors,
1174                    )?;
1175                }
1176
1177                // Check XObjects for uncalibrated color spaces in images
1178                if let Some(xobjects) = resources.get("XObject") {
1179                    self.check_xobject_colorspaces(
1180                        reader,
1181                        xobjects,
1182                        page_idx,
1183                        has_output_intent,
1184                        errors,
1185                    )?;
1186                }
1187            }
1188        }
1189
1190        Ok(())
1191    }
1192
1193    /// Check if the document has a valid OutputIntent
1194    fn has_output_intent<R: Read + Seek>(
1195        &self,
1196        reader: &mut PdfReader<R>,
1197    ) -> Result<bool, PdfAError> {
1198        let catalog = reader
1199            .catalog()
1200            .map_err(|e| PdfAError::ParseError(e.to_string()))?;
1201
1202        if let Some(output_intents) = catalog.get("OutputIntents") {
1203            let arr = if let Some((obj_num, gen_num)) = output_intents.as_reference() {
1204                let obj = reader
1205                    .get_object(obj_num, gen_num)
1206                    .map_err(|e| PdfAError::ParseError(e.to_string()))?;
1207                obj.as_array().cloned()
1208            } else {
1209                output_intents.as_array().cloned()
1210            };
1211
1212            if let Some(arr) = arr {
1213                // Check if any OutputIntent has the required PDF/A subtype
1214                for item in &arr.0 {
1215                    let intent_dict = if let Some((obj_num, gen_num)) = item.as_reference() {
1216                        let obj = reader
1217                            .get_object(obj_num, gen_num)
1218                            .map_err(|e| PdfAError::ParseError(e.to_string()))?;
1219                        obj.as_dict().cloned()
1220                    } else {
1221                        item.as_dict().cloned()
1222                    };
1223
1224                    if let Some(dict) = intent_dict {
1225                        // Check for GTS_PDFA1 or similar subtype
1226                        if let Some(subtype) = dict.get("S") {
1227                            if let Some(name) = subtype.as_name() {
1228                                if name.0.contains("PDFA") || name.0.contains("PDF/A") {
1229                                    return Ok(true);
1230                                }
1231                            }
1232                        }
1233                        // Also check for DestOutputProfile
1234                        if dict.get("DestOutputProfile").is_some() {
1235                            return Ok(true);
1236                        }
1237                    }
1238                }
1239            }
1240        }
1241
1242        Ok(false)
1243    }
1244
1245    /// Check ColorSpace dictionary entries
1246    fn check_colorspace_dict<R: Read + Seek>(
1247        &self,
1248        reader: &mut PdfReader<R>,
1249        cs_obj: &crate::parser::objects::PdfObject,
1250        page_idx: u32,
1251        has_output_intent: bool,
1252        errors: &mut Vec<ValidationError>,
1253    ) -> Result<(), PdfAError> {
1254        let cs_dict = if let Some((obj_num, gen_num)) = cs_obj.as_reference() {
1255            let obj = reader
1256                .get_object(obj_num, gen_num)
1257                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
1258            obj.as_dict().cloned()
1259        } else {
1260            cs_obj.as_dict().cloned()
1261        };
1262
1263        let cs_dict = match cs_dict {
1264            Some(d) => d,
1265            None => return Ok(()),
1266        };
1267
1268        for (cs_name, cs_value) in cs_dict.0.iter() {
1269            self.validate_colorspace(
1270                reader,
1271                &cs_name.0,
1272                cs_value,
1273                page_idx,
1274                has_output_intent,
1275                errors,
1276            )?;
1277        }
1278
1279        Ok(())
1280    }
1281
1282    /// Validate a single color space entry
1283    fn validate_colorspace<R: Read + Seek>(
1284        &self,
1285        reader: &mut PdfReader<R>,
1286        cs_name: &str,
1287        cs_value: &crate::parser::objects::PdfObject,
1288        page_idx: u32,
1289        has_output_intent: bool,
1290        errors: &mut Vec<ValidationError>,
1291    ) -> Result<(), PdfAError> {
1292        // Color space can be a name or an array
1293        let cs_type = if let Some(name) = cs_value.as_name() {
1294            name.0.clone()
1295        } else if let Some(arr) = cs_value.as_array() {
1296            // First element is the color space name
1297            arr.0
1298                .first()
1299                .and_then(|o| o.as_name())
1300                .map(|n| n.0.clone())
1301                .unwrap_or_default()
1302        } else if let Some((obj_num, gen_num)) = cs_value.as_reference() {
1303            // Resolve reference
1304            let obj = reader
1305                .get_object(obj_num, gen_num)
1306                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
1307            if let Some(name) = obj.as_name() {
1308                name.0.clone()
1309            } else if let Some(arr) = obj.as_array() {
1310                arr.0
1311                    .first()
1312                    .and_then(|o| o.as_name())
1313                    .map(|n| n.0.clone())
1314                    .unwrap_or_default()
1315            } else {
1316                return Ok(());
1317            }
1318        } else {
1319            return Ok(());
1320        };
1321
1322        // Check if it's a device-dependent color space
1323        if self.is_device_dependent_colorspace(&cs_type) && !has_output_intent {
1324            errors.push(ValidationError::InvalidColorSpace {
1325                color_space: cs_type,
1326                location: format!("Page {}, ColorSpace/{}", page_idx + 1, cs_name),
1327            });
1328        }
1329
1330        Ok(())
1331    }
1332
1333    /// Check XObjects for device-dependent color spaces
1334    fn check_xobject_colorspaces<R: Read + Seek>(
1335        &self,
1336        reader: &mut PdfReader<R>,
1337        xobjects_obj: &crate::parser::objects::PdfObject,
1338        page_idx: u32,
1339        has_output_intent: bool,
1340        errors: &mut Vec<ValidationError>,
1341    ) -> Result<(), PdfAError> {
1342        let xobjects_dict = if let Some((obj_num, gen_num)) = xobjects_obj.as_reference() {
1343            let obj = reader
1344                .get_object(obj_num, gen_num)
1345                .map_err(|e| PdfAError::ParseError(e.to_string()))?;
1346            obj.as_dict().cloned()
1347        } else {
1348            xobjects_obj.as_dict().cloned()
1349        };
1350
1351        let xobjects_dict = match xobjects_dict {
1352            Some(d) => d,
1353            None => return Ok(()),
1354        };
1355
1356        for (xo_name, xo_ref) in xobjects_dict.0.iter() {
1357            let xo_dict = if let Some((obj_num, gen_num)) = xo_ref.as_reference() {
1358                let obj = reader
1359                    .get_object(obj_num, gen_num)
1360                    .map_err(|e| PdfAError::ParseError(e.to_string()))?;
1361                if let Some(stream) = obj.as_stream() {
1362                    Some(stream.dict.clone())
1363                } else {
1364                    obj.as_dict().cloned()
1365                }
1366            } else if let Some(stream) = xo_ref.as_stream() {
1367                Some(stream.dict.clone())
1368            } else {
1369                xo_ref.as_dict().cloned()
1370            };
1371
1372            if let Some(dict) = xo_dict {
1373                // Check if it's an Image XObject
1374                let is_image = dict
1375                    .get("Subtype")
1376                    .and_then(|s| s.as_name())
1377                    .map(|n| n.0 == "Image")
1378                    .unwrap_or(false);
1379
1380                if is_image {
1381                    // Check ColorSpace of the image
1382                    if let Some(cs) = dict.get("ColorSpace") {
1383                        let cs_type = if let Some(name) = cs.as_name() {
1384                            name.0.clone()
1385                        } else if let Some(arr) = cs.as_array() {
1386                            arr.0
1387                                .first()
1388                                .and_then(|o| o.as_name())
1389                                .map(|n| n.0.clone())
1390                                .unwrap_or_default()
1391                        } else {
1392                            String::new()
1393                        };
1394
1395                        if self.is_device_dependent_colorspace(&cs_type) && !has_output_intent {
1396                            errors.push(ValidationError::InvalidColorSpace {
1397                                color_space: cs_type,
1398                                location: format!("Page {}, XObject/{}", page_idx + 1, &xo_name.0),
1399                            });
1400                        }
1401                    }
1402                }
1403            }
1404        }
1405
1406        Ok(())
1407    }
1408
1409    /// Check if a color space is device-dependent
1410    fn is_device_dependent_colorspace(&self, cs_type: &str) -> bool {
1411        matches!(cs_type, "DeviceRGB" | "DeviceCMYK" | "DeviceGray")
1412    }
1413}
1414
1415#[cfg(test)]
1416mod tests {
1417    use super::*;
1418
1419    #[test]
1420    fn test_validator_new() {
1421        let validator = PdfAValidator::new(PdfALevel::A1b);
1422        assert_eq!(validator.level(), PdfALevel::A1b);
1423    }
1424
1425    #[test]
1426    fn test_validator_level_a2b() {
1427        let validator = PdfAValidator::new(PdfALevel::A2b);
1428        assert_eq!(validator.level(), PdfALevel::A2b);
1429    }
1430
1431    #[test]
1432    fn test_validator_level_a3b() {
1433        let validator = PdfAValidator::new(PdfALevel::A3b);
1434        assert_eq!(validator.level(), PdfALevel::A3b);
1435    }
1436
1437    #[test]
1438    fn test_validator_collect_all_errors() {
1439        let validator = PdfAValidator::new(PdfALevel::A1b).collect_all_errors(false);
1440        assert!(!validator.collect_all_errors);
1441    }
1442
1443    #[test]
1444    fn test_validator_clone() {
1445        let validator = PdfAValidator::new(PdfALevel::A2u);
1446        let cloned = validator.clone();
1447        assert_eq!(cloned.level(), PdfALevel::A2u);
1448    }
1449
1450    #[test]
1451    fn test_validator_debug() {
1452        let validator = PdfAValidator::new(PdfALevel::A1a);
1453        let debug_str = format!("{:?}", validator);
1454        assert!(debug_str.contains("PdfAValidator"));
1455        assert!(debug_str.contains("A1a"));
1456    }
1457
1458    #[test]
1459    fn test_is_javascript_action_true() {
1460        use crate::parser::objects::{PdfDictionary, PdfName, PdfObject};
1461
1462        let validator = PdfAValidator::new(PdfALevel::A1b);
1463        let mut dict = PdfDictionary::new();
1464        dict.insert(
1465            "S".to_string(),
1466            PdfObject::Name(PdfName("JavaScript".to_string())),
1467        );
1468
1469        assert!(validator.is_javascript_action(&dict));
1470    }
1471
1472    #[test]
1473    fn test_is_javascript_action_false() {
1474        use crate::parser::objects::{PdfDictionary, PdfName, PdfObject};
1475
1476        let validator = PdfAValidator::new(PdfALevel::A1b);
1477        let mut dict = PdfDictionary::new();
1478        dict.insert(
1479            "S".to_string(),
1480            PdfObject::Name(PdfName("GoTo".to_string())),
1481        );
1482
1483        assert!(!validator.is_javascript_action(&dict));
1484    }
1485
1486    #[test]
1487    fn test_is_javascript_action_no_s_key() {
1488        use crate::parser::objects::PdfDictionary;
1489
1490        let validator = PdfAValidator::new(PdfALevel::A1b);
1491        let dict = PdfDictionary::new();
1492
1493        assert!(!validator.is_javascript_action(&dict));
1494    }
1495
1496    #[test]
1497    fn test_check_for_external_action_gotor() {
1498        use crate::parser::objects::{PdfDictionary, PdfName, PdfObject};
1499
1500        let validator = PdfAValidator::new(PdfALevel::A1b);
1501        let mut dict = PdfDictionary::new();
1502        dict.insert(
1503            "S".to_string(),
1504            PdfObject::Name(PdfName("GoToR".to_string())),
1505        );
1506
1507        let mut errors = Vec::new();
1508        validator.check_for_external_action(&dict, &mut errors);
1509
1510        assert_eq!(errors.len(), 1);
1511        assert!(matches!(
1512            errors[0],
1513            ValidationError::ExternalReferenceForbidden { .. }
1514        ));
1515    }
1516
1517    #[test]
1518    fn test_check_for_external_action_launch() {
1519        use crate::parser::objects::{PdfDictionary, PdfName, PdfObject};
1520
1521        let validator = PdfAValidator::new(PdfALevel::A1b);
1522        let mut dict = PdfDictionary::new();
1523        dict.insert(
1524            "S".to_string(),
1525            PdfObject::Name(PdfName("Launch".to_string())),
1526        );
1527
1528        let mut errors = Vec::new();
1529        validator.check_for_external_action(&dict, &mut errors);
1530
1531        assert_eq!(errors.len(), 1);
1532    }
1533
1534    #[test]
1535    fn test_check_for_external_action_goto_internal() {
1536        use crate::parser::objects::{PdfDictionary, PdfName, PdfObject};
1537
1538        let validator = PdfAValidator::new(PdfALevel::A1b);
1539        let mut dict = PdfDictionary::new();
1540        dict.insert(
1541            "S".to_string(),
1542            PdfObject::Name(PdfName("GoTo".to_string())),
1543        );
1544
1545        let mut errors = Vec::new();
1546        validator.check_for_external_action(&dict, &mut errors);
1547
1548        assert_eq!(errors.len(), 0); // Internal GoTo is allowed
1549    }
1550
1551    #[test]
1552    fn test_check_stream_for_lzw_single_filter() {
1553        use crate::parser::objects::{PdfDictionary, PdfName, PdfObject};
1554
1555        let validator = PdfAValidator::new(PdfALevel::A1b);
1556        let mut dict = PdfDictionary::new();
1557        dict.insert(
1558            "Filter".to_string(),
1559            PdfObject::Name(PdfName("LZWDecode".to_string())),
1560        );
1561
1562        let mut errors = Vec::new();
1563        validator.check_stream_for_lzw(&dict, 10, 0, &mut errors);
1564
1565        assert_eq!(errors.len(), 1);
1566        assert!(matches!(
1567            &errors[0],
1568            ValidationError::LzwCompressionForbidden { object_id } if object_id == "page 1, object 10 0"
1569        ));
1570    }
1571
1572    #[test]
1573    fn test_check_stream_for_lzw_array_filter() {
1574        use crate::parser::objects::{PdfArray, PdfDictionary, PdfName, PdfObject};
1575
1576        let validator = PdfAValidator::new(PdfALevel::A1b);
1577        let mut dict = PdfDictionary::new();
1578        let filters = PdfArray(vec![
1579            PdfObject::Name(PdfName("FlateDecode".to_string())),
1580            PdfObject::Name(PdfName("LZWDecode".to_string())),
1581        ]);
1582        dict.insert("Filter".to_string(), PdfObject::Array(filters));
1583
1584        let mut errors = Vec::new();
1585        validator.check_stream_for_lzw(&dict, 20, 2, &mut errors);
1586
1587        assert_eq!(errors.len(), 1);
1588        assert!(matches!(
1589            &errors[0],
1590            ValidationError::LzwCompressionForbidden { object_id } if object_id.contains("page 3") && object_id.contains("object 20 0")
1591        ));
1592    }
1593
1594    #[test]
1595    fn test_check_stream_for_lzw_no_lzw() {
1596        use crate::parser::objects::{PdfDictionary, PdfName, PdfObject};
1597
1598        let validator = PdfAValidator::new(PdfALevel::A1b);
1599        let mut dict = PdfDictionary::new();
1600        dict.insert(
1601            "Filter".to_string(),
1602            PdfObject::Name(PdfName("FlateDecode".to_string())),
1603        );
1604
1605        let mut errors = Vec::new();
1606        validator.check_stream_for_lzw(&dict, 30, 0, &mut errors);
1607
1608        assert_eq!(errors.len(), 0);
1609    }
1610
1611    #[test]
1612    fn test_check_stream_for_lzw_no_filter() {
1613        use crate::parser::objects::PdfDictionary;
1614
1615        let validator = PdfAValidator::new(PdfALevel::A1b);
1616        let dict = PdfDictionary::new();
1617
1618        let mut errors = Vec::new();
1619        validator.check_stream_for_lzw(&dict, 40, 0, &mut errors);
1620
1621        assert_eq!(errors.len(), 0);
1622    }
1623
1624    #[test]
1625    fn test_pdfa_level_allows_lzw() {
1626        // PDF/A-1 does not allow LZW
1627        assert!(!PdfALevel::A1b.allows_lzw());
1628        assert!(!PdfALevel::A1a.allows_lzw());
1629
1630        // PDF/A-2 and PDF/A-3 allow LZW
1631        assert!(PdfALevel::A2b.allows_lzw());
1632        assert!(PdfALevel::A3b.allows_lzw());
1633    }
1634
1635    #[test]
1636    fn test_pdfa_level_allows_embedded_files() {
1637        // PDF/A-1 and PDF/A-2 do not allow embedded files
1638        assert!(!PdfALevel::A1b.allows_embedded_files());
1639        assert!(!PdfALevel::A2b.allows_embedded_files());
1640
1641        // PDF/A-3 allows embedded files
1642        assert!(PdfALevel::A3b.allows_embedded_files());
1643    }
1644
1645    #[test]
1646    fn test_is_device_dependent_colorspace() {
1647        let validator = PdfAValidator::new(PdfALevel::A1b);
1648
1649        // Device-dependent color spaces
1650        assert!(validator.is_device_dependent_colorspace("DeviceRGB"));
1651        assert!(validator.is_device_dependent_colorspace("DeviceCMYK"));
1652        assert!(validator.is_device_dependent_colorspace("DeviceGray"));
1653
1654        // Device-independent color spaces
1655        assert!(!validator.is_device_dependent_colorspace("CalRGB"));
1656        assert!(!validator.is_device_dependent_colorspace("CalGray"));
1657        assert!(!validator.is_device_dependent_colorspace("Lab"));
1658        assert!(!validator.is_device_dependent_colorspace("ICCBased"));
1659        assert!(!validator.is_device_dependent_colorspace("Indexed"));
1660        assert!(!validator.is_device_dependent_colorspace("Pattern"));
1661    }
1662
1663    #[test]
1664    fn test_pdfa_conformance_level_a() {
1665        // Level A requires ToUnicode for accessible conformance
1666        assert_eq!(PdfALevel::A1a.conformance(), PdfAConformance::A);
1667        assert_eq!(PdfALevel::A2a.conformance(), PdfAConformance::A);
1668        assert_eq!(PdfALevel::A3a.conformance(), PdfAConformance::A);
1669
1670        // Level B is basic conformance
1671        assert_eq!(PdfALevel::A1b.conformance(), PdfAConformance::B);
1672        assert_eq!(PdfALevel::A2b.conformance(), PdfAConformance::B);
1673        assert_eq!(PdfALevel::A3b.conformance(), PdfAConformance::B);
1674
1675        // Level U is Unicode conformance
1676        assert_eq!(PdfALevel::A2u.conformance(), PdfAConformance::U);
1677        assert_eq!(PdfALevel::A3u.conformance(), PdfAConformance::U);
1678    }
1679}