Skip to main content

pdf_ast/recovery/
diagnostics.rs

1use super::*;
2use crate::ast::{NodeType, PdfDocument};
3use std::collections::HashMap;
4
5/// PDF document diagnostics and health assessment
6pub struct DocumentDiagnostics {
7    config: DiagnosticsConfig,
8    checkers: Vec<Box<dyn HealthChecker>>,
9}
10
11/// Configuration for diagnostics
12#[derive(Debug, Clone)]
13pub struct DiagnosticsConfig {
14    pub deep_analysis: bool,
15    pub check_integrity: bool,
16    pub analyze_structure: bool,
17    pub validate_references: bool,
18    pub check_streams: bool,
19    pub timeout_ms: u64,
20}
21
22impl Default for DiagnosticsConfig {
23    fn default() -> Self {
24        Self {
25            deep_analysis: true,
26            check_integrity: true,
27            analyze_structure: true,
28            validate_references: true,
29            check_streams: true,
30            timeout_ms: 30000, // 30 seconds
31        }
32    }
33}
34
35/// Comprehensive health report for a PDF document
36#[derive(Debug, Clone)]
37pub struct HealthReport {
38    pub overall_health: DocumentHealth,
39    pub structure_health: StructureHealth,
40    pub integrity_score: f64,
41    pub corruption_indicators: Vec<CorruptionIndicator>,
42    pub recommendations: Vec<Recommendation>,
43    pub detailed_findings: HashMap<String, Finding>,
44    pub statistics: DiagnosticStatistics,
45}
46
47/// Structure-specific health information
48#[derive(Debug, Clone)]
49pub struct StructureHealth {
50    pub has_valid_header: bool,
51    pub has_catalog: bool,
52    pub has_pages_tree: bool,
53    pub has_valid_xref: bool,
54    pub has_trailer: bool,
55    pub reference_integrity: f64,
56    pub stream_integrity: f64,
57}
58
59/// Indicator of potential corruption
60#[derive(Debug, Clone)]
61pub struct CorruptionIndicator {
62    pub indicator_type: CorruptionType,
63    pub severity: ErrorSeverity,
64    pub location: String,
65    pub description: String,
66    pub confidence: f64,
67}
68
69/// Type of corruption detected
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub enum CorruptionType {
72    StructuralDamage,
73    DataCorruption,
74    MissingComponents,
75    InvalidReferences,
76    StreamCorruption,
77    EncodingIssues,
78    IntegrityViolation,
79}
80
81/// Recommendation for fixing issues
82#[derive(Debug, Clone)]
83pub struct Recommendation {
84    pub priority: Priority,
85    pub action: RecommendedAction,
86    pub description: String,
87    pub estimated_success_rate: f64,
88}
89
90/// Recommended action types
91#[derive(Debug, Clone, PartialEq, Eq)]
92pub enum RecommendedAction {
93    StructureRepair,
94    ReferenceResolution,
95    StreamReconstruction,
96    EncodingFix,
97    DataRecovery,
98    ManualIntervention,
99}
100
101/// Priority levels for recommendations
102#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
103pub enum Priority {
104    Low = 1,
105    Medium = 2,
106    High = 3,
107    Critical = 4,
108}
109
110/// Detailed finding from diagnostic checks
111#[derive(Debug, Clone)]
112pub struct Finding {
113    pub check_name: String,
114    pub status: CheckStatus,
115    pub details: String,
116    pub metrics: HashMap<String, f64>,
117}
118
119/// Status of a diagnostic check
120#[derive(Debug, Clone, PartialEq, Eq)]
121pub enum CheckStatus {
122    Passed,
123    Warning,
124    Failed,
125    Error,
126    Skipped,
127}
128
129/// Statistics from diagnostic analysis
130#[derive(Debug, Clone, Default)]
131pub struct DiagnosticStatistics {
132    pub checks_performed: usize,
133    pub checks_passed: usize,
134    pub checks_failed: usize,
135    pub warnings_generated: usize,
136    pub analysis_time_ms: u64,
137    pub nodes_analyzed: usize,
138    pub bytes_analyzed: u64,
139}
140
141impl DocumentDiagnostics {
142    /// Create a new diagnostics analyzer
143    pub fn new(config: DiagnosticsConfig) -> Self {
144        let mut diagnostics = Self {
145            config: config.clone(),
146            checkers: Vec::new(),
147        };
148
149        diagnostics.initialize_checkers(&config);
150        diagnostics
151    }
152
153    /// Perform comprehensive health analysis
154    pub fn analyze_health(&self, document: &PdfDocument, data: &[u8]) -> HealthReport {
155        let start_time = std::time::Instant::now();
156        let mut report = HealthReport {
157            overall_health: DocumentHealth::Healthy,
158            structure_health: StructureHealth::default(),
159            integrity_score: 1.0,
160            corruption_indicators: Vec::new(),
161            recommendations: Vec::new(),
162            detailed_findings: HashMap::new(),
163            statistics: DiagnosticStatistics::default(),
164        };
165
166        // Analyze structure
167        if self.config.analyze_structure {
168            self.analyze_structure(document, data, &mut report);
169        }
170
171        // Check integrity
172        if self.config.check_integrity {
173            self.check_integrity(document, data, &mut report);
174        }
175
176        // Validate references
177        if self.config.validate_references {
178            self.validate_references(document, &mut report);
179        }
180
181        // Check streams
182        if self.config.check_streams {
183            self.check_streams(document, data, &mut report);
184        }
185
186        // Run all health checkers
187        for checker in &self.checkers {
188            let finding = checker.check_health(document, data);
189            report
190                .detailed_findings
191                .insert(checker.name().to_string(), finding);
192        }
193
194        // Calculate overall health
195        self.calculate_overall_health(&mut report);
196
197        // Generate recommendations
198        self.generate_recommendations(&mut report);
199
200        // Finalize statistics
201        let elapsed = start_time.elapsed().as_millis() as u64;
202        report.statistics.analysis_time_ms = elapsed;
203        report.statistics.nodes_analyzed = document.ast.get_all_nodes().len();
204        report.statistics.bytes_analyzed = data.len() as u64;
205
206        report
207    }
208
209    /// Initialize health checkers based on configuration
210    fn initialize_checkers(&mut self, config: &DiagnosticsConfig) {
211        self.checkers.push(Box::new(HeaderChecker::new()));
212        self.checkers.push(Box::new(StructureChecker::new()));
213        self.checkers.push(Box::new(ReferenceChecker::new()));
214
215        if config.check_streams {
216            self.checkers.push(Box::new(StreamChecker::new()));
217        }
218
219        if config.check_integrity {
220            self.checkers.push(Box::new(IntegrityChecker::new()));
221        }
222    }
223
224    /// Analyze document structure
225    fn analyze_structure(&self, document: &PdfDocument, data: &[u8], report: &mut HealthReport) {
226        let structure = StructureHealth {
227            has_valid_header: data.starts_with(b"%PDF-"),
228            has_catalog: document.ast.get_root().is_some(),
229            has_pages_tree: !document.ast.get_nodes_by_type(NodeType::Pages).is_empty(),
230            has_valid_xref: data.windows(4).any(|w| w == b"xref"),
231            has_trailer: data.windows(7).any(|w| w == b"trailer"),
232            reference_integrity: self.calculate_reference_integrity(document),
233            stream_integrity: self.calculate_stream_integrity(document, data),
234        };
235
236        // Check conditions before moving structure
237        let passed_checks =
238            if structure.has_valid_header && structure.has_catalog && structure.has_pages_tree {
239                3
240            } else {
241                0
242            };
243
244        report.structure_health = structure;
245        report.statistics.checks_performed += 5; // 5 structure checks
246        report.statistics.checks_passed += passed_checks;
247    }
248
249    /// Check document integrity
250    fn check_integrity(&self, document: &PdfDocument, data: &[u8], report: &mut HealthReport) {
251        let mut integrity_issues = 0;
252        let mut total_checks = 0;
253
254        // Check for truncated data
255        total_checks += 1;
256        if !data.ends_with(b"%%EOF") {
257            integrity_issues += 1;
258            report.corruption_indicators.push(CorruptionIndicator {
259                indicator_type: CorruptionType::StructuralDamage,
260                severity: ErrorSeverity::Warning,
261                location: "End of file".to_string(),
262                description: "Missing or corrupted EOF marker".to_string(),
263                confidence: 0.9,
264            });
265        }
266
267        // Check for null bytes in inappropriate places
268        total_checks += 1;
269        if self.contains_inappropriate_nulls(data) {
270            integrity_issues += 1;
271            report.corruption_indicators.push(CorruptionIndicator {
272                indicator_type: CorruptionType::DataCorruption,
273                severity: ErrorSeverity::Warning,
274                location: "Throughout document".to_string(),
275                description: "Null bytes found in inappropriate locations".to_string(),
276                confidence: 0.7,
277            });
278        }
279
280        // Check object count consistency
281        total_checks += 1;
282        let expected_objects = self.count_object_declarations(data);
283        let actual_objects = document.ast.get_all_nodes().len();
284        if expected_objects > 0 && actual_objects < expected_objects / 2 {
285            integrity_issues += 1;
286            report.corruption_indicators.push(CorruptionIndicator {
287                indicator_type: CorruptionType::MissingComponents,
288                severity: ErrorSeverity::Error,
289                location: "Object count".to_string(),
290                description: format!(
291                    "Expected {} objects, found {}",
292                    expected_objects, actual_objects
293                ),
294                confidence: 0.8,
295            });
296        }
297
298        // Calculate integrity score
299        report.integrity_score = if total_checks > 0 {
300            1.0 - (integrity_issues as f64 / total_checks as f64)
301        } else {
302            1.0
303        };
304
305        report.statistics.checks_performed += total_checks;
306        report.statistics.checks_passed += total_checks - integrity_issues;
307        report.statistics.checks_failed += integrity_issues;
308    }
309
310    /// Validate object references
311    fn validate_references(&self, document: &PdfDocument, report: &mut HealthReport) {
312        let nodes = document.ast.get_all_nodes();
313        let mut total_refs = 0;
314        let mut broken_refs = 0;
315
316        for node in &nodes {
317            let refs = self.extract_references(&node.value);
318            total_refs += refs.len();
319
320            for reference in refs {
321                if !self.reference_exists(document, &reference) {
322                    broken_refs += 1;
323                }
324            }
325        }
326
327        if broken_refs > 0 {
328            report.corruption_indicators.push(CorruptionIndicator {
329                indicator_type: CorruptionType::InvalidReferences,
330                severity: if broken_refs > total_refs / 2 {
331                    ErrorSeverity::Critical
332                } else {
333                    ErrorSeverity::Warning
334                },
335                location: "Object references".to_string(),
336                description: format!("{} broken references out of {}", broken_refs, total_refs),
337                confidence: 0.95,
338            });
339        }
340
341        report.statistics.checks_performed += 1;
342        if broken_refs == 0 {
343            report.statistics.checks_passed += 1;
344        } else {
345            report.statistics.checks_failed += 1;
346        }
347    }
348
349    /// Check stream integrity
350    fn check_streams(&self, _document: &PdfDocument, data: &[u8], report: &mut HealthReport) {
351        let streams = self.find_streams_in_data(data);
352        let mut corrupted_streams = 0;
353
354        for stream in streams {
355            if self.is_stream_corrupted(&stream) {
356                corrupted_streams += 1;
357            }
358        }
359
360        if corrupted_streams > 0 {
361            report.corruption_indicators.push(CorruptionIndicator {
362                indicator_type: CorruptionType::StreamCorruption,
363                severity: ErrorSeverity::Warning,
364                location: "Stream objects".to_string(),
365                description: format!("{} corrupted streams detected", corrupted_streams),
366                confidence: 0.8,
367            });
368        }
369
370        report.statistics.checks_performed += 1;
371        if corrupted_streams == 0 {
372            report.statistics.checks_passed += 1;
373        } else {
374            report.statistics.checks_failed += 1;
375        }
376    }
377
378    /// Calculate overall document health
379    fn calculate_overall_health(&self, report: &mut HealthReport) {
380        let mut health_score = report.integrity_score;
381        let structure = &report.structure_health;
382
383        // Adjust score based on structure
384        if !structure.has_valid_header {
385            health_score -= 0.2;
386        }
387        if !structure.has_catalog {
388            health_score -= 0.3;
389        }
390        if !structure.has_pages_tree {
391            health_score -= 0.2;
392        }
393        if !structure.has_valid_xref {
394            health_score -= 0.1;
395        }
396        if !structure.has_trailer {
397            health_score -= 0.1;
398        }
399
400        // Adjust based on corruption indicators
401        let critical_count = report
402            .corruption_indicators
403            .iter()
404            .filter(|i| i.severity == ErrorSeverity::Critical)
405            .count();
406        let error_count = report
407            .corruption_indicators
408            .iter()
409            .filter(|i| i.severity == ErrorSeverity::Error)
410            .count();
411
412        health_score -= critical_count as f64 * 0.2;
413        health_score -= error_count as f64 * 0.1;
414
415        // Determine overall health
416        report.overall_health = if health_score >= 0.9 {
417            DocumentHealth::Healthy
418        } else if health_score >= 0.7 {
419            DocumentHealth::PartiallyRecovered
420        } else if health_score >= 0.4 {
421            DocumentHealth::Damaged
422        } else {
423            DocumentHealth::SeverelyDamaged
424        };
425    }
426
427    /// Generate recommendations based on findings
428    fn generate_recommendations(&self, report: &mut HealthReport) {
429        for indicator in &report.corruption_indicators {
430            let recommendation = match indicator.indicator_type {
431                CorruptionType::StructuralDamage => Recommendation {
432                    priority: Priority::High,
433                    action: RecommendedAction::StructureRepair,
434                    description: "Repair basic PDF structure".to_string(),
435                    estimated_success_rate: 0.8,
436                },
437                CorruptionType::InvalidReferences => Recommendation {
438                    priority: Priority::Medium,
439                    action: RecommendedAction::ReferenceResolution,
440                    description: "Fix broken object references".to_string(),
441                    estimated_success_rate: 0.7,
442                },
443                CorruptionType::StreamCorruption => Recommendation {
444                    priority: Priority::Medium,
445                    action: RecommendedAction::StreamReconstruction,
446                    description: "Reconstruct corrupted streams".to_string(),
447                    estimated_success_rate: 0.6,
448                },
449                CorruptionType::EncodingIssues => Recommendation {
450                    priority: Priority::Low,
451                    action: RecommendedAction::EncodingFix,
452                    description: "Fix text encoding issues".to_string(),
453                    estimated_success_rate: 0.9,
454                },
455                _ => Recommendation {
456                    priority: Priority::Medium,
457                    action: RecommendedAction::DataRecovery,
458                    description: "Attempt general data recovery".to_string(),
459                    estimated_success_rate: 0.5,
460                },
461            };
462
463            report.recommendations.push(recommendation);
464        }
465
466        // Sort recommendations by priority
467        report
468            .recommendations
469            .sort_by(|a, b| b.priority.cmp(&a.priority));
470    }
471
472    // Helper methods
473    fn calculate_reference_integrity(&self, document: &PdfDocument) -> f64 {
474        let nodes = document.ast.get_all_nodes();
475        if nodes.is_empty() {
476            return 1.0;
477        }
478
479        let mut total_refs = 0;
480        let mut valid_refs = 0;
481
482        for node in &nodes {
483            let refs = self.extract_references(&node.value);
484            total_refs += refs.len();
485
486            for reference in refs {
487                if self.reference_exists(document, &reference) {
488                    valid_refs += 1;
489                }
490            }
491        }
492
493        if total_refs == 0 {
494            1.0
495        } else {
496            valid_refs as f64 / total_refs as f64
497        }
498    }
499
500    fn calculate_stream_integrity(&self, _document: &PdfDocument, data: &[u8]) -> f64 {
501        let streams = self.find_streams_in_data(data);
502        if streams.is_empty() {
503            return 1.0;
504        }
505
506        let mut valid_streams = 0;
507        for stream in &streams {
508            if !self.is_stream_corrupted(stream) {
509                valid_streams += 1;
510            }
511        }
512
513        valid_streams as f64 / streams.len() as f64
514    }
515
516    fn contains_inappropriate_nulls(&self, data: &[u8]) -> bool {
517        // Check for null bytes in text content (simplified)
518        let text_regions = self.find_text_regions(data);
519        for region in text_regions {
520            if region.contains(&0u8) {
521                return true;
522            }
523        }
524        false
525    }
526
527    fn count_object_declarations(&self, data: &[u8]) -> usize {
528        let data_str = String::from_utf8_lossy(data);
529        data_str.matches(" obj").count()
530    }
531
532    #[allow(clippy::only_used_in_recursion)]
533    fn extract_references(&self, value: &crate::types::PdfValue) -> Vec<String> {
534        let mut refs = Vec::new();
535
536        match value {
537            crate::types::PdfValue::Reference(r) => {
538                refs.push(format!(
539                    "{} {} R",
540                    r.object_id().number,
541                    r.object_id().generation
542                ));
543            }
544            crate::types::PdfValue::Dictionary(dict) => {
545                for (_, v) in dict.iter() {
546                    refs.extend(self.extract_references(v));
547                }
548            }
549            crate::types::PdfValue::Array(arr) => {
550                for v in arr.iter() {
551                    refs.extend(self.extract_references(v));
552                }
553            }
554            _ => {}
555        }
556
557        refs
558    }
559
560    fn reference_exists(&self, document: &PdfDocument, _reference: &str) -> bool {
561        // Simplified reference checking
562        // In practice, would parse the reference and check if object exists
563        !document.ast.get_all_nodes().is_empty()
564    }
565
566    fn find_streams_in_data(&self, data: &[u8]) -> Vec<StreamInfo> {
567        let mut streams = Vec::new();
568        let mut pos = 0;
569
570        while let Some(start) = self.find_pattern(&data[pos..], b"stream") {
571            let abs_start = pos + start;
572            if let Some(end) = self.find_pattern(&data[abs_start..], b"endstream") {
573                let abs_end = abs_start + end;
574                streams.push(StreamInfo {
575                    start: abs_start,
576                    end: abs_end,
577                    data: data[abs_start..abs_end].to_vec(),
578                });
579                pos = abs_end;
580            } else {
581                pos = abs_start + 6;
582            }
583        }
584
585        streams
586    }
587
588    fn is_stream_corrupted(&self, stream: &StreamInfo) -> bool {
589        // Check for common stream corruption indicators
590        let data = &stream.data;
591
592        // Check if stream starts properly
593        if !data.starts_with(b"stream") {
594            return true;
595        }
596
597        // Check if stream ends properly
598        if !data.ends_with(b"endstream") {
599            return true;
600        }
601
602        // Check for unexpected null bytes
603        let content_start = 6; // Skip "stream"
604        let content_end = data.len() - 9; // Skip "endstream"
605        if content_end > content_start {
606            let content = &data[content_start..content_end];
607            // Allow some null bytes but not excessive amounts
608            let null_count = content.iter().filter(|&&b| b == 0).count();
609            if null_count > content.len() / 4 {
610                return true;
611            }
612        }
613
614        false
615    }
616
617    fn find_text_regions<'a>(&self, data: &'a [u8]) -> Vec<&'a [u8]> {
618        // Simplified text region detection
619        // In practice, would analyze the PDF structure to find text content
620        vec![data] // Return entire data for simplification
621    }
622
623    fn find_pattern(&self, data: &[u8], pattern: &[u8]) -> Option<usize> {
624        data.windows(pattern.len())
625            .position(|window| window == pattern)
626    }
627}
628
629impl Default for DocumentDiagnostics {
630    fn default() -> Self {
631        Self::new(DiagnosticsConfig::default())
632    }
633}
634
635impl Default for StructureHealth {
636    fn default() -> Self {
637        Self {
638            has_valid_header: false,
639            has_catalog: false,
640            has_pages_tree: false,
641            has_valid_xref: false,
642            has_trailer: false,
643            reference_integrity: 0.0,
644            stream_integrity: 0.0,
645        }
646    }
647}
648
649#[derive(Debug, Clone)]
650#[allow(dead_code)]
651struct StreamInfo {
652    start: usize,
653    end: usize,
654    data: Vec<u8>,
655}
656
657/// Base trait for health checkers
658pub trait HealthChecker: Send + Sync {
659    fn name(&self) -> &str;
660    fn check_health(&self, document: &PdfDocument, data: &[u8]) -> Finding;
661}
662
663/// Header health checker
664pub struct HeaderChecker;
665
666impl Default for HeaderChecker {
667    fn default() -> Self {
668        Self::new()
669    }
670}
671
672impl HeaderChecker {
673    pub fn new() -> Self {
674        Self
675    }
676}
677
678impl HealthChecker for HeaderChecker {
679    fn name(&self) -> &str {
680        "HeaderChecker"
681    }
682
683    fn check_health(&self, _document: &PdfDocument, data: &[u8]) -> Finding {
684        let has_header = data.starts_with(b"%PDF-");
685        let mut metrics = HashMap::new();
686        metrics.insert("has_header".to_string(), if has_header { 1.0 } else { 0.0 });
687
688        Finding {
689            check_name: "Header Validation".to_string(),
690            status: if has_header {
691                CheckStatus::Passed
692            } else {
693                CheckStatus::Failed
694            },
695            details: if has_header {
696                "Valid PDF header found".to_string()
697            } else {
698                "Missing or invalid PDF header".to_string()
699            },
700            metrics,
701        }
702    }
703}
704
705/// Structure health checker
706pub struct StructureChecker;
707
708impl Default for StructureChecker {
709    fn default() -> Self {
710        Self::new()
711    }
712}
713
714impl StructureChecker {
715    pub fn new() -> Self {
716        Self
717    }
718}
719
720impl HealthChecker for StructureChecker {
721    fn name(&self) -> &str {
722        "StructureChecker"
723    }
724
725    fn check_health(&self, document: &PdfDocument, _data: &[u8]) -> Finding {
726        let has_root = document.ast.get_root().is_some();
727        let node_count = document.ast.get_all_nodes().len();
728
729        let mut metrics = HashMap::new();
730        metrics.insert("has_root".to_string(), if has_root { 1.0 } else { 0.0 });
731        metrics.insert("node_count".to_string(), node_count as f64);
732
733        let status = if has_root && node_count > 0 {
734            CheckStatus::Passed
735        } else if has_root {
736            CheckStatus::Warning
737        } else {
738            CheckStatus::Failed
739        };
740
741        Finding {
742            check_name: "Structure Validation".to_string(),
743            status,
744            details: format!("Document has {} nodes, root: {}", node_count, has_root),
745            metrics,
746        }
747    }
748}
749
750/// Reference health checker
751pub struct ReferenceChecker;
752
753impl Default for ReferenceChecker {
754    fn default() -> Self {
755        Self::new()
756    }
757}
758
759impl ReferenceChecker {
760    pub fn new() -> Self {
761        Self
762    }
763}
764
765impl HealthChecker for ReferenceChecker {
766    fn name(&self) -> &str {
767        "ReferenceChecker"
768    }
769
770    fn check_health(&self, document: &PdfDocument, _data: &[u8]) -> Finding {
771        let nodes = document.ast.get_all_nodes();
772        let mut metrics = HashMap::new();
773        metrics.insert("total_nodes".to_string(), nodes.len() as f64);
774
775        Finding {
776            check_name: "Reference Validation".to_string(),
777            status: CheckStatus::Passed, // Simplified
778            details: "Reference integrity check completed".to_string(),
779            metrics,
780        }
781    }
782}
783
784/// Stream health checker
785pub struct StreamChecker;
786
787impl Default for StreamChecker {
788    fn default() -> Self {
789        Self::new()
790    }
791}
792
793impl StreamChecker {
794    pub fn new() -> Self {
795        Self
796    }
797}
798
799impl HealthChecker for StreamChecker {
800    fn name(&self) -> &str {
801        "StreamChecker"
802    }
803
804    fn check_health(&self, _document: &PdfDocument, data: &[u8]) -> Finding {
805        let stream_count = data.windows(6).filter(|w| *w == b"stream").count();
806        let mut metrics = HashMap::new();
807        metrics.insert("stream_count".to_string(), stream_count as f64);
808
809        Finding {
810            check_name: "Stream Validation".to_string(),
811            status: CheckStatus::Passed,
812            details: format!("Found {} streams", stream_count),
813            metrics,
814        }
815    }
816}
817
818/// Integrity health checker
819pub struct IntegrityChecker;
820
821impl Default for IntegrityChecker {
822    fn default() -> Self {
823        Self::new()
824    }
825}
826
827impl IntegrityChecker {
828    pub fn new() -> Self {
829        Self
830    }
831}
832
833impl HealthChecker for IntegrityChecker {
834    fn name(&self) -> &str {
835        "IntegrityChecker"
836    }
837
838    fn check_health(&self, _document: &PdfDocument, data: &[u8]) -> Finding {
839        let has_eof = data.ends_with(b"%%EOF") || data.ends_with(b"%%EOF\n");
840        let mut metrics = HashMap::new();
841        metrics.insert("has_eof".to_string(), if has_eof { 1.0 } else { 0.0 });
842        metrics.insert("file_size".to_string(), data.len() as f64);
843
844        Finding {
845            check_name: "Integrity Validation".to_string(),
846            status: if has_eof {
847                CheckStatus::Passed
848            } else {
849                CheckStatus::Warning
850            },
851            details: if has_eof {
852                "File integrity appears intact".to_string()
853            } else {
854                "Missing EOF marker - file may be truncated".to_string()
855            },
856            metrics,
857        }
858    }
859}
860
861/// Quick health assessment function
862pub fn quick_health_check(document: &PdfDocument, data: &[u8]) -> DocumentHealth {
863    let diagnostics = DocumentDiagnostics::new(DiagnosticsConfig {
864        deep_analysis: false,
865        ..DiagnosticsConfig::default()
866    });
867
868    let report = diagnostics.analyze_health(document, data);
869    report.overall_health
870}