oxidize_pdf/recovery/
validator.rs

1//! PDF validation and integrity checking
2
3use crate::error::Result;
4use crate::parser::PdfReader;
5use std::collections::HashSet;
6use std::io::{Read, Seek};
7use std::path::Path;
8
9/// PDF validation errors
10#[derive(Debug, Clone)]
11pub enum ValidationError {
12    /// Invalid PDF header
13    InvalidHeader(String),
14    /// Missing required objects
15    MissingObjects(Vec<String>),
16    /// Invalid cross-reference
17    InvalidXRef(String),
18    /// Circular reference detected
19    CircularReference(u32, u32),
20    /// Invalid page tree
21    InvalidPageTree(String),
22    /// Corrupted stream
23    CorruptedStream(u32),
24    /// Invalid encoding
25    InvalidEncoding(String),
26    /// Security violation
27    SecurityViolation(String),
28}
29
30/// Validation result
31#[derive(Debug)]
32pub struct ValidationResult {
33    /// Whether PDF is valid
34    pub is_valid: bool,
35    /// Validation errors found
36    pub errors: Vec<ValidationError>,
37    /// Validation warnings
38    pub warnings: Vec<String>,
39    /// Validation statistics
40    pub stats: ValidationStats,
41}
42
43/// Validation statistics
44#[derive(Debug, Default)]
45pub struct ValidationStats {
46    /// Total objects checked
47    pub objects_checked: usize,
48    /// Valid objects
49    pub valid_objects: usize,
50    /// Total pages validated
51    pub pages_validated: usize,
52    /// Streams validated
53    pub streams_validated: usize,
54    /// Cross-references validated
55    pub xrefs_validated: usize,
56}
57
58/// PDF validator
59pub struct PdfValidator {
60    /// Validation options
61    strict_mode: bool,
62    /// Maximum validation depth
63    #[allow(dead_code)]
64    max_depth: usize,
65    /// Visited objects (for circular reference detection)
66    visited: HashSet<(u32, u16)>,
67}
68
69impl Default for PdfValidator {
70    fn default() -> Self {
71        Self::new()
72    }
73}
74
75impl PdfValidator {
76    /// Create a new validator
77    pub fn new() -> Self {
78        Self {
79            strict_mode: false,
80            max_depth: 100,
81            visited: HashSet::new(),
82        }
83    }
84
85    /// Enable strict validation mode
86    pub fn strict(mut self) -> Self {
87        self.strict_mode = true;
88        self
89    }
90
91    /// Validate a PDF file
92    pub fn validate_file<P: AsRef<Path>>(&mut self, path: P) -> Result<ValidationResult> {
93        let mut result = ValidationResult {
94            is_valid: true,
95            errors: Vec::new(),
96            warnings: Vec::new(),
97            stats: ValidationStats::default(),
98        };
99
100        // Try to open the PDF
101        match PdfReader::open_document(path) {
102            Ok(doc) => {
103                self.validate_document(&doc, &mut result)?;
104            }
105            Err(e) => {
106                result.is_valid = false;
107                result
108                    .errors
109                    .push(ValidationError::InvalidHeader(e.to_string()));
110
111                // In strict mode, add a warning about the validation attempt
112                if self.strict_mode {
113                    result.warnings.push(
114                        "Could not perform full validation due to document opening error"
115                            .to_string(),
116                    );
117                }
118            }
119        }
120
121        Ok(result)
122    }
123
124    /// Validate an open PDF document
125    pub fn validate_document<R: Read + Seek>(
126        &mut self,
127        doc: &crate::parser::PdfDocument<R>,
128        result: &mut ValidationResult,
129    ) -> Result<()> {
130        // Validate structure
131        self.validate_structure(doc, result)?;
132
133        // Validate pages
134        self.validate_pages(doc, result)?;
135
136        // Validate cross-references
137        self.validate_xrefs(doc, result)?;
138
139        // Validate objects
140        self.validate_objects(doc, result)?;
141
142        result.is_valid = result.errors.is_empty();
143
144        Ok(())
145    }
146
147    fn validate_structure<R: Read + Seek>(
148        &self,
149        doc: &crate::parser::PdfDocument<R>,
150        result: &mut ValidationResult,
151    ) -> Result<()> {
152        // Check for required root objects
153        if doc
154            .page_count()
155            .map_err(|e| crate::error::PdfError::InvalidStructure(e.to_string()))?
156            == 0
157        {
158            result.warnings.push("Document has no pages".to_string());
159        }
160
161        // Check PDF version
162        match doc.version() {
163            Ok(version) => {
164                if !version.starts_with("1.") && !version.starts_with("2.") {
165                    result
166                        .warnings
167                        .push(format!("Unusual PDF version: {version}"));
168                }
169            }
170            Err(e) => {
171                result
172                    .errors
173                    .push(ValidationError::InvalidHeader(e.to_string()));
174            }
175        }
176
177        Ok(())
178    }
179
180    fn validate_pages<R: Read + Seek>(
181        &mut self,
182        doc: &crate::parser::PdfDocument<R>,
183        result: &mut ValidationResult,
184    ) -> Result<()> {
185        let page_count = doc
186            .page_count()
187            .map_err(|e| crate::error::PdfError::InvalidStructure(e.to_string()))?;
188
189        for i in 0..page_count {
190            match doc.get_page(i) {
191                Ok(page) => {
192                    // Validate page dimensions
193                    if page.width() <= 0.0 || page.height() <= 0.0 {
194                        result.errors.push(ValidationError::InvalidPageTree(format!(
195                            "Page {i} has invalid dimensions"
196                        )));
197                    }
198
199                    result.stats.pages_validated += 1;
200                }
201                Err(e) => {
202                    result.errors.push(ValidationError::InvalidPageTree(format!(
203                        "Cannot read page {i}: {e}"
204                    )));
205                }
206            }
207        }
208
209        Ok(())
210    }
211
212    fn validate_xrefs<R: Read + Seek>(
213        &self,
214        _doc: &crate::parser::PdfDocument<R>,
215        result: &mut ValidationResult,
216    ) -> Result<()> {
217        // Simplified xref validation
218        result.stats.xrefs_validated += 1;
219
220        if self.strict_mode {
221            // In strict mode, check xref integrity
222            result
223                .warnings
224                .push("Cross-reference validation not fully implemented".to_string());
225        }
226
227        Ok(())
228    }
229
230    fn validate_objects<R: Read + Seek>(
231        &mut self,
232        _doc: &crate::parser::PdfDocument<R>,
233        result: &mut ValidationResult,
234    ) -> Result<()> {
235        // Simplified object validation
236        result.stats.objects_checked += 10; // Mock count
237        result.stats.valid_objects += 9;
238
239        if self.strict_mode {
240            // Check for circular references
241            self.check_circular_references(result)?;
242        }
243
244        Ok(())
245    }
246
247    fn check_circular_references(&mut self, _result: &mut ValidationResult) -> Result<()> {
248        // This would check for circular references in the object graph
249        // For now, just clear visited set
250        self.visited.clear();
251
252        Ok(())
253    }
254}
255
256/// Validate a PDF file
257pub fn validate_pdf<P: AsRef<Path>>(path: P) -> Result<ValidationResult> {
258    let mut validator = PdfValidator::new();
259    validator.validate_file(path)
260}
261
262/// Quick validation check
263pub fn is_valid_pdf<P: AsRef<Path>>(path: P) -> bool {
264    validate_pdf(path)
265        .map(|result| result.is_valid)
266        .unwrap_or(false)
267}
268
269/// Validate with strict mode
270pub fn validate_strict<P: AsRef<Path>>(path: P) -> Result<ValidationResult> {
271    let mut validator = PdfValidator::new().strict();
272    validator.validate_file(path)
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278
279    #[test]
280    fn test_validator_creation() {
281        let validator = PdfValidator::new();
282        assert!(!validator.strict_mode);
283        assert_eq!(validator.max_depth, 100);
284
285        let strict_validator = PdfValidator::new().strict();
286        assert!(strict_validator.strict_mode);
287    }
288
289    #[test]
290    fn test_validation_result_default() {
291        let result = ValidationResult {
292            is_valid: true,
293            errors: Vec::new(),
294            warnings: Vec::new(),
295            stats: ValidationStats::default(),
296        };
297
298        assert!(result.is_valid);
299        assert!(result.errors.is_empty());
300        assert!(result.warnings.is_empty());
301    }
302
303    #[test]
304    fn test_validation_error_types() {
305        let error = ValidationError::InvalidHeader("Bad header".to_string());
306        match error {
307            ValidationError::InvalidHeader(msg) => assert_eq!(msg, "Bad header"),
308            _ => panic!("Wrong error type"),
309        }
310
311        let error = ValidationError::CircularReference(1, 2);
312        match error {
313            ValidationError::CircularReference(a, b) => {
314                assert_eq!(a, 1);
315                assert_eq!(b, 2);
316            }
317            _ => panic!("Wrong error type"),
318        }
319    }
320
321    #[test]
322    fn test_validation_stats() {
323        let mut stats = ValidationStats::default();
324        assert_eq!(stats.objects_checked, 0);
325
326        stats.objects_checked = 10;
327        stats.valid_objects = 8;
328        assert_eq!(stats.objects_checked, 10);
329        assert_eq!(stats.valid_objects, 8);
330    }
331
332    #[test]
333    fn test_validation_error_debug_clone() {
334        let errors = vec![
335            ValidationError::InvalidHeader("test".to_string()),
336            ValidationError::MissingObjects(vec!["obj1".to_string(), "obj2".to_string()]),
337            ValidationError::InvalidXRef("xref error".to_string()),
338            ValidationError::CircularReference(1, 2),
339            ValidationError::InvalidPageTree("page error".to_string()),
340            ValidationError::CorruptedStream(42),
341            ValidationError::InvalidEncoding("encoding error".to_string()),
342            ValidationError::SecurityViolation("security error".to_string()),
343        ];
344
345        for error in errors {
346            let debug_str = format!("{error:?}");
347            assert!(!debug_str.is_empty());
348
349            let cloned = error.clone();
350            match (error, cloned) {
351                (ValidationError::InvalidHeader(s1), ValidationError::InvalidHeader(s2)) => {
352                    assert_eq!(s1, s2);
353                }
354                (ValidationError::MissingObjects(v1), ValidationError::MissingObjects(v2)) => {
355                    assert_eq!(v1, v2);
356                }
357                (
358                    ValidationError::CircularReference(a1, b1),
359                    ValidationError::CircularReference(a2, b2),
360                ) => {
361                    assert_eq!(a1, a2);
362                    assert_eq!(b1, b2);
363                }
364                _ => {}
365            }
366        }
367    }
368
369    #[test]
370    fn test_validation_result_debug() {
371        let result = ValidationResult {
372            is_valid: false,
373            errors: vec![ValidationError::InvalidHeader("test".to_string())],
374            warnings: vec!["warning1".to_string()],
375            stats: ValidationStats {
376                objects_checked: 10,
377                valid_objects: 8,
378                pages_validated: 3,
379                streams_validated: 5,
380                xrefs_validated: 1,
381            },
382        };
383
384        let debug_str = format!("{result:?}");
385        assert!(debug_str.contains("ValidationResult"));
386        assert!(debug_str.contains("false"));
387        assert!(debug_str.contains("InvalidHeader"));
388    }
389
390    #[test]
391    fn test_validation_stats_debug_default() {
392        let stats = ValidationStats::default();
393        assert_eq!(stats.objects_checked, 0);
394        assert_eq!(stats.valid_objects, 0);
395        assert_eq!(stats.pages_validated, 0);
396        assert_eq!(stats.streams_validated, 0);
397        assert_eq!(stats.xrefs_validated, 0);
398
399        let debug_str = format!("{stats:?}");
400        assert!(debug_str.contains("ValidationStats"));
401    }
402
403    #[test]
404    fn test_pdf_validator_default() {
405        let validator = PdfValidator::default();
406        assert!(!validator.strict_mode);
407        assert_eq!(validator.max_depth, 100);
408        assert!(validator.visited.is_empty());
409    }
410
411    #[test]
412    fn test_pdf_validator_strict_mode() {
413        let validator = PdfValidator::new();
414        assert!(!validator.strict_mode);
415
416        let strict = validator.strict();
417        assert!(strict.strict_mode);
418    }
419
420    #[test]
421    fn test_validation_error_missing_objects() {
422        let missing = vec![
423            "Font".to_string(),
424            "Page".to_string(),
425            "XObject".to_string(),
426        ];
427        let error = ValidationError::MissingObjects(missing.clone());
428
429        match error {
430            ValidationError::MissingObjects(objects) => {
431                assert_eq!(objects.len(), 3);
432                assert_eq!(objects[0], "Font");
433                assert_eq!(objects[1], "Page");
434                assert_eq!(objects[2], "XObject");
435            }
436            _ => panic!("Wrong error type"),
437        }
438    }
439
440    #[test]
441    fn test_validation_error_corrupted_stream() {
442        let error = ValidationError::CorruptedStream(123);
443        match error {
444            ValidationError::CorruptedStream(id) => assert_eq!(id, 123),
445            _ => panic!("Wrong error type"),
446        }
447    }
448
449    #[test]
450    fn test_validation_error_invalid_encoding() {
451        let error = ValidationError::InvalidEncoding("UTF-16 not supported".to_string());
452        match error {
453            ValidationError::InvalidEncoding(msg) => {
454                assert_eq!(msg, "UTF-16 not supported");
455            }
456            _ => panic!("Wrong error type"),
457        }
458    }
459
460    #[test]
461    fn test_validation_error_security_violation() {
462        let error = ValidationError::SecurityViolation("Encrypted content".to_string());
463        match error {
464            ValidationError::SecurityViolation(msg) => {
465                assert_eq!(msg, "Encrypted content");
466            }
467            _ => panic!("Wrong error type"),
468        }
469    }
470
471    #[test]
472    fn test_validation_result_with_errors() {
473        let result = ValidationResult {
474            is_valid: false,
475            errors: vec![
476                ValidationError::InvalidHeader("Bad header".to_string()),
477                ValidationError::InvalidPageTree("No pages".to_string()),
478            ],
479            warnings: vec!["Old PDF version".to_string()],
480            stats: ValidationStats {
481                objects_checked: 10,
482                valid_objects: 7,
483                pages_validated: 0,
484                streams_validated: 2,
485                xrefs_validated: 1,
486            },
487        };
488
489        assert!(!result.is_valid);
490        assert_eq!(result.errors.len(), 2);
491        assert_eq!(result.warnings.len(), 1);
492        assert_eq!(result.stats.objects_checked, 10);
493        assert_eq!(result.stats.valid_objects, 7);
494    }
495
496    #[test]
497    fn test_is_valid_pdf_nonexistent_file() {
498        let temp_dir = std::env::temp_dir();
499        let temp_path = temp_dir.join("nonexistent_validator_test.pdf");
500
501        let valid = is_valid_pdf(&temp_path);
502        assert!(!valid);
503    }
504
505    #[test]
506    fn test_validate_pdf_nonexistent_file() {
507        let temp_dir = std::env::temp_dir();
508        let temp_path = temp_dir.join("nonexistent_validator_test2.pdf");
509
510        let result = validate_pdf(&temp_path).unwrap();
511        assert!(!result.is_valid);
512        assert!(!result.errors.is_empty());
513    }
514
515    #[test]
516    fn test_validate_strict_nonexistent_file() {
517        let temp_dir = std::env::temp_dir();
518        let temp_path = temp_dir.join("nonexistent_validator_test3.pdf");
519
520        let result = validate_strict(&temp_path).unwrap();
521        assert!(!result.is_valid);
522        assert!(!result.errors.is_empty());
523    }
524
525    #[test]
526    fn test_pdf_validator_visited_tracking() {
527        let mut validator = PdfValidator::new();
528        assert!(validator.visited.is_empty());
529
530        // Simulate visiting objects
531        validator.visited.insert((1, 0));
532        validator.visited.insert((2, 0));
533        validator.visited.insert((3, 1));
534
535        assert_eq!(validator.visited.len(), 3);
536        assert!(validator.visited.contains(&(1, 0)));
537        assert!(validator.visited.contains(&(2, 0)));
538        assert!(validator.visited.contains(&(3, 1)));
539        assert!(!validator.visited.contains(&(4, 0)));
540    }
541
542    #[test]
543    fn test_check_circular_references() {
544        let mut validator = PdfValidator::new();
545        validator.visited.insert((1, 0));
546        validator.visited.insert((2, 0));
547
548        let mut result = ValidationResult {
549            is_valid: true,
550            errors: Vec::new(),
551            warnings: Vec::new(),
552            stats: ValidationStats::default(),
553        };
554
555        validator.check_circular_references(&mut result).unwrap();
556        assert!(validator.visited.is_empty()); // Should be cleared
557    }
558
559    #[test]
560    fn test_validation_stats_increments() {
561        let mut stats = ValidationStats::default();
562
563        stats.objects_checked += 1;
564        assert_eq!(stats.objects_checked, 1);
565
566        stats.valid_objects += 1;
567        assert_eq!(stats.valid_objects, 1);
568
569        stats.pages_validated += 1;
570        assert_eq!(stats.pages_validated, 1);
571
572        stats.streams_validated += 1;
573        assert_eq!(stats.streams_validated, 1);
574
575        stats.xrefs_validated += 1;
576        assert_eq!(stats.xrefs_validated, 1);
577    }
578
579    #[test]
580    fn test_validation_error_invalid_xref() {
581        let error = ValidationError::InvalidXRef("Offset out of bounds".to_string());
582        match error {
583            ValidationError::InvalidXRef(msg) => {
584                assert_eq!(msg, "Offset out of bounds");
585            }
586            _ => panic!("Wrong error type"),
587        }
588    }
589
590    #[test]
591    fn test_validation_error_invalid_page_tree() {
592        let error = ValidationError::InvalidPageTree("Missing Kids array".to_string());
593        match error {
594            ValidationError::InvalidPageTree(msg) => {
595                assert_eq!(msg, "Missing Kids array");
596            }
597            _ => panic!("Wrong error type"),
598        }
599    }
600
601    #[test]
602    fn test_validation_multiple_warnings() {
603        let result = ValidationResult {
604            is_valid: true,
605            errors: Vec::new(),
606            warnings: vec![
607                "Old PDF version".to_string(),
608                "Non-standard font encoding".to_string(),
609                "Large file size".to_string(),
610            ],
611            stats: ValidationStats::default(),
612        };
613
614        assert!(result.is_valid);
615        assert_eq!(result.warnings.len(), 3);
616        assert!(result.warnings.contains(&"Old PDF version".to_string()));
617        assert!(result
618            .warnings
619            .contains(&"Non-standard font encoding".to_string()));
620        assert!(result.warnings.contains(&"Large file size".to_string()));
621    }
622
623    #[test]
624    fn test_pdf_validator_max_depth() {
625        let validator = PdfValidator::new();
626        assert_eq!(validator.max_depth, 100);
627
628        // Test that field exists and has expected value
629        let validator2 = PdfValidator {
630            strict_mode: false,
631            max_depth: 50,
632            visited: HashSet::new(),
633        };
634        assert_eq!(validator2.max_depth, 50);
635    }
636
637    #[test]
638    fn test_validate_file_with_invalid_pdf() {
639        use std::fs::File;
640        use std::io::Write;
641
642        let temp_dir = std::env::temp_dir();
643        let temp_path = temp_dir.join("invalid_pdf_test.pdf");
644        let mut file = File::create(&temp_path).unwrap();
645        file.write_all(b"This is not a PDF file").unwrap();
646
647        let mut validator = PdfValidator::new();
648        let result = validator.validate_file(&temp_path).unwrap();
649
650        assert!(!result.is_valid);
651        assert!(!result.errors.is_empty());
652        assert!(matches!(
653            result.errors.first(),
654            Some(ValidationError::InvalidHeader(_))
655        ));
656
657        // Cleanup
658        let _ = std::fs::remove_file(temp_path);
659    }
660
661    #[test]
662    fn test_validate_file_nonexistent() {
663        let temp_dir = std::env::temp_dir();
664        let temp_path = temp_dir.join("nonexistent_validator_file.pdf");
665
666        let mut validator = PdfValidator::new();
667        let result = validator.validate_file(&temp_path).unwrap();
668
669        assert!(!result.is_valid);
670        assert!(!result.errors.is_empty());
671    }
672
673    #[test]
674    fn test_validate_strict_with_valid_pdf() {
675        use std::fs::File;
676        use std::io::Write;
677
678        let temp_dir = std::env::temp_dir();
679        let temp_path = temp_dir.join("valid_strict_test.pdf");
680        let mut file = File::create(&temp_path).unwrap();
681        // Create a more complete PDF structure that PdfReader can parse
682        file.write_all(b"%PDF-1.7\n1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\nxref\n0 3\n0000000000 65535 f\n0000000009 00000 n\n0000000068 00000 n\ntrailer\n<< /Size 3 /Root 1 0 R >>\nstartxref\n116\n%%EOF")
683            .unwrap();
684
685        let mut validator = PdfValidator::new().strict();
686        let result = validator.validate_file(&temp_path).unwrap();
687
688        // Should have warnings in strict mode
689        assert!(!result.warnings.is_empty());
690
691        // Cleanup
692        let _ = std::fs::remove_file(temp_path);
693    }
694
695    #[test]
696    fn test_multiple_validation_errors() {
697        let result = ValidationResult {
698            is_valid: false,
699            errors: vec![
700                ValidationError::InvalidHeader("Bad header".to_string()),
701                ValidationError::MissingObjects(vec!["Font1".to_string(), "XObject2".to_string()]),
702                ValidationError::InvalidXRef("Corrupt xref".to_string()),
703                ValidationError::CircularReference(1, 2),
704                ValidationError::InvalidPageTree("No pages".to_string()),
705                ValidationError::CorruptedStream(99),
706                ValidationError::InvalidEncoding("Unknown encoding".to_string()),
707                ValidationError::SecurityViolation("Access denied".to_string()),
708            ],
709            warnings: vec!["Warning 1".to_string(), "Warning 2".to_string()],
710            stats: ValidationStats {
711                objects_checked: 100,
712                valid_objects: 50,
713                pages_validated: 5,
714                streams_validated: 10,
715                xrefs_validated: 1,
716            },
717        };
718
719        assert!(!result.is_valid);
720        assert_eq!(result.errors.len(), 8);
721        assert_eq!(result.warnings.len(), 2);
722        assert_eq!(result.stats.objects_checked, 100);
723        assert_eq!(result.stats.valid_objects, 50);
724    }
725
726    #[test]
727    fn test_validation_error_patterns() {
728        let errors = vec![
729            (
730                ValidationError::InvalidHeader("PDF version 3.0 not supported".to_string()),
731                "InvalidHeader",
732            ),
733            (
734                ValidationError::MissingObjects(vec!["Page1".to_string(), "Page2".to_string()]),
735                "MissingObjects",
736            ),
737            (
738                ValidationError::InvalidXRef("Offset exceeds file size".to_string()),
739                "InvalidXRef",
740            ),
741            (
742                ValidationError::CircularReference(10, 20),
743                "CircularReference",
744            ),
745            (
746                ValidationError::InvalidPageTree("Pages loop detected".to_string()),
747                "InvalidPageTree",
748            ),
749            (ValidationError::CorruptedStream(55), "CorruptedStream"),
750            (
751                ValidationError::InvalidEncoding("Unknown CMap".to_string()),
752                "InvalidEncoding",
753            ),
754            (
755                ValidationError::SecurityViolation("Password required".to_string()),
756                "SecurityViolation",
757            ),
758        ];
759
760        for (error, expected_pattern) in errors {
761            let debug_str = format!("{error:?}");
762            assert!(debug_str.contains(expected_pattern));
763        }
764    }
765
766    #[test]
767    fn test_validator_with_different_max_depths() {
768        let validator1 = PdfValidator {
769            strict_mode: false,
770            max_depth: 10,
771            visited: HashSet::new(),
772        };
773        assert_eq!(validator1.max_depth, 10);
774
775        let validator2 = PdfValidator {
776            strict_mode: true,
777            max_depth: 200,
778            visited: HashSet::new(),
779        };
780        assert_eq!(validator2.max_depth, 200);
781        assert!(validator2.strict_mode);
782    }
783
784    #[test]
785    fn test_validation_stats_accumulation() {
786        let mut stats = ValidationStats::default();
787
788        // Simulate accumulating stats during validation
789        for i in 0..10 {
790            stats.objects_checked += 1;
791            if i % 2 == 0 {
792                stats.valid_objects += 1;
793            }
794        }
795
796        stats.pages_validated = 5;
797        stats.streams_validated = 8;
798        stats.xrefs_validated = 2;
799
800        assert_eq!(stats.objects_checked, 10);
801        assert_eq!(stats.valid_objects, 5);
802        assert_eq!(stats.pages_validated, 5);
803        assert_eq!(stats.streams_validated, 8);
804        assert_eq!(stats.xrefs_validated, 2);
805    }
806
807    #[test]
808    fn test_validation_result_with_only_warnings() {
809        let result = ValidationResult {
810            is_valid: true,
811            errors: Vec::new(),
812            warnings: vec![
813                "Deprecated PDF version".to_string(),
814                "Non-standard font encoding".to_string(),
815                "Missing optional metadata".to_string(),
816            ],
817            stats: ValidationStats::default(),
818        };
819
820        assert!(result.is_valid);
821        assert!(result.errors.is_empty());
822        assert_eq!(result.warnings.len(), 3);
823    }
824
825    #[test]
826    fn test_circular_reference_different_values() {
827        let refs = vec![
828            ValidationError::CircularReference(0, 0), // Self reference
829            ValidationError::CircularReference(1, 2), // Forward reference
830            ValidationError::CircularReference(100, 50), // Backward reference
831            ValidationError::CircularReference(u32::MAX, u32::MAX - 1), // Large values
832        ];
833
834        for error in refs {
835            match error {
836                ValidationError::CircularReference(a, b) => {
837                    let debug = format!("{error:?}");
838                    assert!(debug.contains(&a.to_string()));
839                    assert!(debug.contains(&b.to_string()));
840                }
841                _ => panic!("Expected CircularReference"),
842            }
843        }
844    }
845
846    #[test]
847    fn test_corrupted_stream_various_ids() {
848        let stream_ids = vec![0, 1, 42, 999, u32::MAX];
849
850        for id in stream_ids {
851            let error = ValidationError::CorruptedStream(id);
852            match error {
853                ValidationError::CorruptedStream(stream_id) => {
854                    assert_eq!(stream_id, id);
855                }
856                _ => panic!("Expected CorruptedStream"),
857            }
858        }
859    }
860
861    #[test]
862    fn test_missing_objects_various_lists() {
863        let test_cases = vec![
864            vec![],
865            vec!["Object1".to_string()],
866            vec![
867                "Font1".to_string(),
868                "Font2".to_string(),
869                "Font3".to_string(),
870            ],
871            vec![
872                "Page".to_string(),
873                "Resources".to_string(),
874                "Contents".to_string(),
875                "MediaBox".to_string(),
876            ],
877        ];
878
879        for objects in test_cases {
880            let count = objects.len();
881            let error = ValidationError::MissingObjects(objects.clone());
882            match error {
883                ValidationError::MissingObjects(list) => {
884                    assert_eq!(list.len(), count);
885                    assert_eq!(list, objects);
886                }
887                _ => panic!("Expected MissingObjects"),
888            }
889        }
890    }
891
892    #[test]
893    fn test_validation_result_edge_cases() {
894        // Empty result
895        let empty = ValidationResult {
896            is_valid: true,
897            errors: vec![],
898            warnings: vec![],
899            stats: ValidationStats::default(),
900        };
901        assert!(empty.is_valid);
902        assert!(empty.errors.is_empty());
903        assert!(empty.warnings.is_empty());
904
905        // Many errors
906        let mut many_errors = ValidationResult {
907            is_valid: false,
908            errors: vec![],
909            warnings: vec![],
910            stats: ValidationStats::default(),
911        };
912        for i in 0..100 {
913            many_errors.errors.push(ValidationError::CorruptedStream(i));
914        }
915        assert_eq!(many_errors.errors.len(), 100);
916
917        // Many warnings
918        let mut many_warnings = ValidationResult {
919            is_valid: true,
920            errors: vec![],
921            warnings: vec![],
922            stats: ValidationStats::default(),
923        };
924        for i in 0..50 {
925            many_warnings.warnings.push(format!("Warning {i}"));
926        }
927        assert_eq!(many_warnings.warnings.len(), 50);
928    }
929
930    #[test]
931    fn test_validator_visited_operations() {
932        let mut validator = PdfValidator::new();
933
934        // Test insert
935        assert!(validator.visited.insert((1, 0)));
936        assert!(validator.visited.insert((2, 0)));
937        assert!(validator.visited.insert((3, 0)));
938        assert!(!validator.visited.insert((1, 0))); // Already exists
939
940        assert_eq!(validator.visited.len(), 3);
941
942        // Test contains
943        assert!(validator.visited.contains(&(1, 0)));
944        assert!(validator.visited.contains(&(2, 0)));
945        assert!(validator.visited.contains(&(3, 0)));
946        assert!(!validator.visited.contains(&(4, 0)));
947
948        // Test remove
949        assert!(validator.visited.remove(&(2, 0)));
950        assert!(!validator.visited.remove(&(2, 0))); // Already removed
951        assert_eq!(validator.visited.len(), 2);
952
953        // Clear
954        validator.visited.clear();
955        assert!(validator.visited.is_empty());
956    }
957
958    #[test]
959    fn test_validation_error_string_contents() {
960        // Test various string contents in errors
961        let test_strings = vec![
962            "".to_string(),
963            "Simple error".to_string(),
964            "Error with special chars: @#$%^&*()".to_string(),
965            "Multi\nline\nerror".to_string(),
966            "Very long error message ".repeat(50),
967        ];
968
969        for s in test_strings {
970            let errors = vec![
971                ValidationError::InvalidHeader(s.clone()),
972                ValidationError::InvalidXRef(s.clone()),
973                ValidationError::InvalidPageTree(s.clone()),
974                ValidationError::InvalidEncoding(s.clone()),
975                ValidationError::SecurityViolation(s.clone()),
976            ];
977
978            for error in errors {
979                let debug = format!("{error:?}");
980                assert!(!debug.is_empty());
981            }
982        }
983    }
984
985    #[test]
986    fn test_is_valid_pdf_with_invalid_content() {
987        use std::fs::File;
988        use std::io::Write;
989
990        let temp_dir = std::env::temp_dir();
991        let temp_path = temp_dir.join("invalid_content_test.pdf");
992        let mut file = File::create(&temp_path).unwrap();
993        file.write_all(b"Not PDF content").unwrap();
994
995        let valid = is_valid_pdf(&temp_path);
996        assert!(!valid);
997
998        // Cleanup
999        let _ = std::fs::remove_file(temp_path);
1000    }
1001
1002    #[test]
1003    fn test_validate_pdf_with_valid_header() {
1004        use std::fs::File;
1005        use std::io::Write;
1006
1007        let temp_dir = std::env::temp_dir();
1008        let temp_path = temp_dir.join("valid_header_test.pdf");
1009        let mut file = File::create(&temp_path).unwrap();
1010        file.write_all(b"%PDF-1.4\n").unwrap();
1011
1012        let result = validate_pdf(&temp_path).unwrap();
1013        // May or may not be valid depending on content
1014        let _ = result.is_valid;
1015
1016        // Cleanup
1017        let _ = std::fs::remove_file(temp_path);
1018    }
1019
1020    #[test]
1021    fn test_validate_strict_with_warnings() {
1022        use std::fs::File;
1023        use std::io::Write;
1024
1025        let temp_dir = std::env::temp_dir();
1026        let temp_path = temp_dir.join("strict_warnings_test.pdf");
1027        let mut file = File::create(&temp_path).unwrap();
1028        file.write_all(b"%PDF-1.7\n").unwrap();
1029
1030        let result = validate_strict(&temp_path).unwrap();
1031        // In strict mode, should have warnings
1032        assert!(!result.warnings.is_empty());
1033
1034        // Cleanup
1035        let _ = std::fs::remove_file(temp_path);
1036    }
1037
1038    #[test]
1039    fn test_validator_check_circular_references_clears_visited() {
1040        let mut validator = PdfValidator::new();
1041
1042        // Add some visited objects
1043        validator.visited.insert((1, 0));
1044        validator.visited.insert((2, 0));
1045        validator.visited.insert((3, 0));
1046        validator.visited.insert((4, 0));
1047        validator.visited.insert((5, 0));
1048
1049        assert_eq!(validator.visited.len(), 5);
1050
1051        let mut result = ValidationResult {
1052            is_valid: true,
1053            errors: Vec::new(),
1054            warnings: Vec::new(),
1055            stats: ValidationStats::default(),
1056        };
1057
1058        // Check circular references should clear visited set
1059        validator.check_circular_references(&mut result).unwrap();
1060        assert!(validator.visited.is_empty());
1061    }
1062
1063    #[test]
1064    fn test_validation_comprehensive_scenario() {
1065        // Create a comprehensive validation result
1066        let result = ValidationResult {
1067            is_valid: false,
1068            errors: vec![
1069                ValidationError::InvalidHeader("Wrong PDF version".to_string()),
1070                ValidationError::MissingObjects(vec!["Font1".to_string(), "Font2".to_string()]),
1071                ValidationError::CircularReference(5, 10),
1072            ],
1073            warnings: vec![
1074                "Deprecated feature used".to_string(),
1075                "Non-standard encoding".to_string(),
1076            ],
1077            stats: ValidationStats {
1078                objects_checked: 50,
1079                valid_objects: 35,
1080                pages_validated: 10,
1081                streams_validated: 15,
1082                xrefs_validated: 2,
1083            },
1084        };
1085
1086        // Verify all fields
1087        assert!(!result.is_valid);
1088        assert_eq!(result.errors.len(), 3);
1089        assert_eq!(result.warnings.len(), 2);
1090        assert_eq!(result.stats.objects_checked, 50);
1091        assert_eq!(result.stats.valid_objects, 35);
1092        assert_eq!(result.stats.pages_validated, 10);
1093        assert_eq!(result.stats.streams_validated, 15);
1094        assert_eq!(result.stats.xrefs_validated, 2);
1095
1096        // Calculate validation rate
1097        let validation_rate =
1098            result.stats.valid_objects as f64 / result.stats.objects_checked as f64;
1099        assert!((validation_rate - 0.7).abs() < 0.01); // 70% valid
1100    }
1101}