Skip to main content

pdfplumber_core/
repair.rs

1//! PDF repair types for best-effort fixing of common PDF issues.
2//!
3//! Provides [`RepairOptions`] for configuring which repairs to attempt
4//! and [`RepairResult`] for reporting what was fixed.
5
6use std::fmt;
7
8/// Options for controlling which PDF repairs to attempt.
9///
10/// Each field enables a specific repair strategy. All default to `true`.
11#[derive(Debug, Clone)]
12pub struct RepairOptions {
13    /// Rebuild the cross-reference table by scanning for `obj`/`endobj` markers.
14    pub rebuild_xref: bool,
15    /// Recalculate stream `/Length` entries from actual stream data.
16    pub fix_stream_lengths: bool,
17    /// Remove or skip unresolvable object references with warnings.
18    pub remove_broken_objects: bool,
19}
20
21impl Default for RepairOptions {
22    fn default() -> Self {
23        Self {
24            rebuild_xref: true,
25            fix_stream_lengths: true,
26            remove_broken_objects: true,
27        }
28    }
29}
30
31impl fmt::Display for RepairOptions {
32    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33        write!(
34            f,
35            "RepairOptions(rebuild_xref={}, fix_stream_lengths={}, remove_broken_objects={})",
36            self.rebuild_xref, self.fix_stream_lengths, self.remove_broken_objects
37        )
38    }
39}
40
41/// Result of a PDF repair operation.
42///
43/// Contains the log of repairs that were applied and whether the
44/// document was modified.
45#[derive(Debug, Clone, Default)]
46pub struct RepairResult {
47    /// Log of repairs applied, one entry per fix.
48    pub log: Vec<String>,
49}
50
51impl RepairResult {
52    /// Create a new empty repair result.
53    pub fn new() -> Self {
54        Self { log: Vec::new() }
55    }
56
57    /// Returns `true` if any repairs were applied.
58    pub fn has_repairs(&self) -> bool {
59        !self.log.is_empty()
60    }
61}
62
63#[cfg(test)]
64mod tests {
65    use super::*;
66
67    #[test]
68    fn repair_options_default_all_enabled() {
69        let opts = RepairOptions::default();
70        assert!(opts.rebuild_xref);
71        assert!(opts.fix_stream_lengths);
72        assert!(opts.remove_broken_objects);
73    }
74
75    #[test]
76    fn repair_options_display() {
77        let opts = RepairOptions::default();
78        let s = opts.to_string();
79        assert!(s.contains("rebuild_xref=true"));
80        assert!(s.contains("fix_stream_lengths=true"));
81        assert!(s.contains("remove_broken_objects=true"));
82    }
83
84    #[test]
85    fn repair_options_custom() {
86        let opts = RepairOptions {
87            rebuild_xref: false,
88            fix_stream_lengths: true,
89            remove_broken_objects: false,
90        };
91        assert!(!opts.rebuild_xref);
92        assert!(opts.fix_stream_lengths);
93        assert!(!opts.remove_broken_objects);
94    }
95
96    #[test]
97    fn repair_options_clone() {
98        let opts1 = RepairOptions::default();
99        let opts2 = opts1.clone();
100        assert_eq!(opts1.rebuild_xref, opts2.rebuild_xref);
101        assert_eq!(opts1.fix_stream_lengths, opts2.fix_stream_lengths);
102        assert_eq!(opts1.remove_broken_objects, opts2.remove_broken_objects);
103    }
104
105    #[test]
106    fn repair_result_new_empty() {
107        let result = RepairResult::new();
108        assert!(result.log.is_empty());
109        assert!(!result.has_repairs());
110    }
111
112    #[test]
113    fn repair_result_with_entries() {
114        let mut result = RepairResult::new();
115        result
116            .log
117            .push("fixed stream length for object 3 0".to_string());
118        result
119            .log
120            .push("removed broken reference to object 5 0".to_string());
121        assert!(result.has_repairs());
122        assert_eq!(result.log.len(), 2);
123    }
124
125    #[test]
126    fn repair_result_default() {
127        let result = RepairResult::default();
128        assert!(!result.has_repairs());
129    }
130}