Skip to main content

oxidize_pdf/parser/
trailer.rs

1//! PDF Trailer Parser
2//!
3//! Parses PDF trailer according to ISO 32000-1 Section 7.5.5
4
5use super::objects::{PdfDictionary, PdfObject};
6use super::{ParseError, ParseResult};
7
8/// PDF Trailer information
9#[derive(Debug, Clone)]
10pub struct PdfTrailer {
11    /// The trailer dictionary
12    pub dict: PdfDictionary,
13    /// Byte offset of previous xref section (if any)
14    pub prev: Option<u64>,
15    /// Byte offset of this xref section
16    pub xref_offset: u64,
17}
18
19impl PdfTrailer {
20    /// Parse trailer from a dictionary
21    pub fn from_dict(dict: PdfDictionary, xref_offset: u64) -> ParseResult<Self> {
22        // Extract previous xref offset if present
23        let prev = dict
24            .get("Prev")
25            .and_then(|obj| obj.as_integer())
26            .map(|i| i as u64);
27
28        Ok(PdfTrailer {
29            dict,
30            prev,
31            xref_offset,
32        })
33    }
34
35    /// Get the size (number of entries in xref table)
36    pub fn size(&self) -> ParseResult<u32> {
37        self.dict
38            .get("Size")
39            .and_then(|obj| obj.as_integer())
40            .map(|i| i as u32)
41            .ok_or_else(|| ParseError::MissingKey("Size".to_string()))
42    }
43
44    /// Get the root object reference (document catalog)
45    pub fn root(&self) -> ParseResult<(u32, u16)> {
46        self.dict
47            .get("Root")
48            .and_then(|obj| obj.as_reference())
49            .ok_or_else(|| ParseError::MissingKey("Root".to_string()))
50    }
51
52    /// Try to find root by scanning for Catalog object
53    pub fn find_root_fallback(&self) -> Option<(u32, u16)> {
54        // This is a placeholder - actual implementation would scan objects
55        // For now, try common object numbers for catalog
56        if let Some(obj_num) = [1, 2, 3, 4, 5].into_iter().next() {
57            // Would need to check if object exists and is a Catalog
58            // For now, return first attempt as a guess
59            return Some((obj_num, 0));
60        }
61        None
62    }
63
64    /// Get the info object reference (document information dictionary)
65    pub fn info(&self) -> Option<(u32, u16)> {
66        self.dict.get("Info").and_then(|obj| obj.as_reference())
67    }
68
69    /// Get the ID array (file identifiers)
70    pub fn id(&self) -> Option<&PdfObject> {
71        self.dict.get("ID")
72    }
73
74    /// Check if this PDF is encrypted
75    pub fn is_encrypted(&self) -> bool {
76        self.dict.contains_key("Encrypt")
77    }
78
79    /// Get the encryption dictionary reference
80    pub fn encrypt(&self) -> ParseResult<Option<(u32, u16)>> {
81        Ok(self.dict.get("Encrypt").and_then(|obj| obj.as_reference()))
82    }
83
84    /// Validate the trailer dictionary
85    pub fn validate(&self) -> ParseResult<()> {
86        // Required entries
87        self.size()?;
88        self.root()?;
89
90        // Note: Encryption is now handled by the reader, not rejected here
91
92        Ok(())
93    }
94
95    /// Get access to the trailer dictionary
96    pub fn dict(&self) -> &PdfDictionary {
97        &self.dict
98    }
99}
100
101/// Represents the complete trailer chain for PDFs with updates
102#[derive(Debug)]
103pub struct TrailerChain {
104    /// List of trailers from newest to oldest
105    trailers: Vec<PdfTrailer>,
106}
107
108impl TrailerChain {
109    /// Create a new trailer chain with a single trailer
110    pub fn new(trailer: PdfTrailer) -> Self {
111        Self {
112            trailers: vec![trailer],
113        }
114    }
115
116    /// Add an older trailer to the chain
117    pub fn add_previous(&mut self, trailer: PdfTrailer) {
118        self.trailers.push(trailer);
119    }
120
121    /// Get the most recent trailer
122    pub fn current(&self) -> &PdfTrailer {
123        &self.trailers[0]
124    }
125
126    /// Get all trailers in the chain
127    pub fn all(&self) -> &[PdfTrailer] {
128        &self.trailers
129    }
130
131    /// Check if there are previous versions
132    pub fn has_previous(&self) -> bool {
133        self.trailers.len() > 1
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140    use crate::parser::objects::{PdfArray, PdfObject, PdfString};
141
142    #[test]
143    fn test_trailer_basic() {
144        let mut dict = PdfDictionary::new();
145        dict.insert("Size".to_string(), PdfObject::Integer(100));
146        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
147
148        let trailer = PdfTrailer::from_dict(dict, 12345).unwrap();
149
150        assert_eq!(trailer.size().unwrap(), 100);
151        assert_eq!(trailer.root().unwrap(), (1, 0));
152        assert!(trailer.info().is_none());
153        assert!(!trailer.is_encrypted());
154    }
155
156    #[test]
157    fn test_trailer_with_prev() {
158        let mut dict = PdfDictionary::new();
159        dict.insert("Size".to_string(), PdfObject::Integer(200));
160        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
161        dict.insert("Prev".to_string(), PdfObject::Integer(5000));
162
163        let trailer = PdfTrailer::from_dict(dict, 20000).unwrap();
164
165        assert_eq!(trailer.prev, Some(5000));
166        assert_eq!(trailer.xref_offset, 20000);
167    }
168
169    #[test]
170    fn test_trailer_validation() {
171        // Missing Size
172        let mut dict = PdfDictionary::new();
173        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
174
175        let trailer = PdfTrailer::from_dict(dict, 12345).unwrap();
176        assert!(trailer.validate().is_err());
177
178        // Missing Root
179        let mut dict = PdfDictionary::new();
180        dict.insert("Size".to_string(), PdfObject::Integer(100));
181
182        let trailer = PdfTrailer::from_dict(dict, 12345).unwrap();
183        assert!(trailer.validate().is_err());
184
185        // Encrypted
186        let mut dict = PdfDictionary::new();
187        dict.insert("Size".to_string(), PdfObject::Integer(100));
188        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
189        dict.insert("Encrypt".to_string(), PdfObject::Reference(10, 0));
190
191        let trailer = PdfTrailer::from_dict(dict, 12345).unwrap();
192        // Encryption is now handled by the reader, not rejected at trailer level
193        assert!(trailer.validate().is_ok());
194        // But we can still detect that encryption is present
195        assert!(trailer.encrypt().unwrap().is_some());
196    }
197
198    #[test]
199    fn test_trailer_with_info() {
200        let mut dict = PdfDictionary::new();
201        dict.insert("Size".to_string(), PdfObject::Integer(150));
202        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
203        dict.insert("Info".to_string(), PdfObject::Reference(2, 0));
204
205        let trailer = PdfTrailer::from_dict(dict, 15000).unwrap();
206
207        assert_eq!(trailer.info(), Some((2, 0)));
208        assert_eq!(trailer.size().unwrap(), 150);
209    }
210
211    #[test]
212    fn test_trailer_with_id() {
213        let mut dict = PdfDictionary::new();
214        dict.insert("Size".to_string(), PdfObject::Integer(100));
215        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
216
217        let mut id_array = PdfArray::new();
218        id_array.push(PdfObject::String(PdfString(b"ID1".to_vec())));
219        id_array.push(PdfObject::String(PdfString(b"ID2".to_vec())));
220        dict.insert("ID".to_string(), PdfObject::Array(id_array));
221
222        let trailer = PdfTrailer::from_dict(dict, 10000).unwrap();
223
224        assert!(trailer.id().is_some());
225        assert!(matches!(trailer.id().unwrap(), PdfObject::Array(_)));
226    }
227
228    #[test]
229    fn test_trailer_size_missing() {
230        let mut dict = PdfDictionary::new();
231        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
232
233        let trailer = PdfTrailer::from_dict(dict, 1000).unwrap();
234
235        match trailer.size() {
236            Err(ParseError::MissingKey(key)) => assert_eq!(key, "Size"),
237            _ => panic!("Expected MissingKey error for Size"),
238        }
239    }
240
241    #[test]
242    fn test_trailer_root_missing() {
243        let mut dict = PdfDictionary::new();
244        dict.insert("Size".to_string(), PdfObject::Integer(100));
245
246        let trailer = PdfTrailer::from_dict(dict, 1000).unwrap();
247
248        match trailer.root() {
249            Err(ParseError::MissingKey(key)) => assert_eq!(key, "Root"),
250            _ => panic!("Expected MissingKey error for Root"),
251        }
252    }
253
254    #[test]
255    fn test_trailer_invalid_size_type() {
256        let mut dict = PdfDictionary::new();
257        dict.insert(
258            "Size".to_string(),
259            PdfObject::String(PdfString(b"not a number".to_vec())),
260        );
261        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
262
263        let trailer = PdfTrailer::from_dict(dict, 1000).unwrap();
264
265        assert!(trailer.size().is_err());
266    }
267
268    #[test]
269    fn test_trailer_invalid_root_type() {
270        let mut dict = PdfDictionary::new();
271        dict.insert("Size".to_string(), PdfObject::Integer(100));
272        dict.insert(
273            "Root".to_string(),
274            PdfObject::String(PdfString(b"not a reference".to_vec())),
275        );
276
277        let trailer = PdfTrailer::from_dict(dict, 1000).unwrap();
278
279        assert!(trailer.root().is_err());
280    }
281
282    #[test]
283    fn test_trailer_encrypt_reference() {
284        let mut dict = PdfDictionary::new();
285        dict.insert("Size".to_string(), PdfObject::Integer(100));
286        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
287        dict.insert("Encrypt".to_string(), PdfObject::Reference(5, 0));
288
289        let trailer = PdfTrailer::from_dict(dict, 1000).unwrap();
290
291        assert!(trailer.is_encrypted());
292        assert_eq!(trailer.encrypt().unwrap(), Some((5, 0)));
293    }
294
295    #[test]
296    fn test_trailer_chain_single() {
297        let mut dict = PdfDictionary::new();
298        dict.insert("Size".to_string(), PdfObject::Integer(100));
299        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
300
301        let trailer = PdfTrailer::from_dict(dict, 1000).unwrap();
302        let chain = TrailerChain::new(trailer);
303
304        assert!(!chain.has_previous());
305        assert_eq!(chain.all().len(), 1);
306        assert_eq!(chain.current().xref_offset, 1000);
307    }
308
309    #[test]
310    fn test_trailer_chain_multiple() {
311        let mut dict1 = PdfDictionary::new();
312        dict1.insert("Size".to_string(), PdfObject::Integer(100));
313        dict1.insert("Root".to_string(), PdfObject::Reference(1, 0));
314        dict1.insert("Prev".to_string(), PdfObject::Integer(500));
315        let trailer1 = PdfTrailer::from_dict(dict1, 1000).unwrap();
316
317        let mut dict2 = PdfDictionary::new();
318        dict2.insert("Size".to_string(), PdfObject::Integer(80));
319        dict2.insert("Root".to_string(), PdfObject::Reference(1, 0));
320        let trailer2 = PdfTrailer::from_dict(dict2, 500).unwrap();
321
322        let mut chain = TrailerChain::new(trailer1);
323        chain.add_previous(trailer2);
324
325        assert!(chain.has_previous());
326        assert_eq!(chain.all().len(), 2);
327        assert_eq!(chain.current().xref_offset, 1000);
328        assert_eq!(chain.all()[1].xref_offset, 500);
329    }
330
331    #[test]
332    fn test_trailer_prev_as_float() {
333        let mut dict = PdfDictionary::new();
334        dict.insert("Size".to_string(), PdfObject::Integer(100));
335        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
336        dict.insert("Prev".to_string(), PdfObject::Real(5000.0));
337
338        let trailer = PdfTrailer::from_dict(dict, 10000).unwrap();
339
340        // Real numbers should not be converted to prev offset
341        assert_eq!(trailer.prev, None);
342    }
343
344    #[test]
345    fn test_trailer_large_values() {
346        let mut dict = PdfDictionary::new();
347        dict.insert("Size".to_string(), PdfObject::Integer(i64::MAX));
348        dict.insert("Root".to_string(), PdfObject::Reference(u32::MAX, u16::MAX));
349        dict.insert("Prev".to_string(), PdfObject::Integer(i64::MAX));
350
351        let trailer = PdfTrailer::from_dict(dict, u64::MAX).unwrap();
352
353        assert_eq!(trailer.size().unwrap(), u32::MAX);
354        assert_eq!(trailer.root().unwrap(), (u32::MAX, u16::MAX));
355        assert_eq!(trailer.prev, Some(i64::MAX as u64));
356        assert_eq!(trailer.xref_offset, u64::MAX);
357    }
358
359    #[test]
360    fn test_trailer_all_optional_fields() {
361        let mut dict = PdfDictionary::new();
362        dict.insert("Size".to_string(), PdfObject::Integer(200));
363        dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
364        dict.insert("Info".to_string(), PdfObject::Reference(2, 0));
365        dict.insert("Prev".to_string(), PdfObject::Integer(1000));
366
367        let mut id_array = PdfArray::new();
368        id_array.push(PdfObject::String(PdfString(b"FirstID".to_vec())));
369        id_array.push(PdfObject::String(PdfString(b"SecondID".to_vec())));
370        dict.insert("ID".to_string(), PdfObject::Array(id_array));
371
372        let trailer = PdfTrailer::from_dict(dict.clone(), 5000).unwrap();
373
374        assert_eq!(trailer.size().unwrap(), 200);
375        assert_eq!(trailer.root().unwrap(), (1, 0));
376        assert_eq!(trailer.info(), Some((2, 0)));
377        assert_eq!(trailer.prev, Some(1000));
378        assert!(trailer.id().is_some());
379        assert!(!trailer.is_encrypted());
380        assert_eq!(trailer.xref_offset, 5000);
381
382        // Verify validation passes
383        assert!(trailer.validate().is_ok());
384    }
385
386    #[test]
387    fn test_trailer_chain_ordering() {
388        let trailers: Vec<PdfTrailer> = (0..5)
389            .map(|i| {
390                let mut dict = PdfDictionary::new();
391                dict.insert("Size".to_string(), PdfObject::Integer(100 + i));
392                dict.insert("Root".to_string(), PdfObject::Reference(1, 0));
393                if i > 0 {
394                    dict.insert("Prev".to_string(), PdfObject::Integer(i * 1000));
395                }
396                PdfTrailer::from_dict(dict, ((i + 1) * 1000) as u64).unwrap()
397            })
398            .collect();
399
400        let mut chain = TrailerChain::new(trailers[0].clone());
401        for trailer in trailers.iter().skip(1) {
402            chain.add_previous(trailer.clone());
403        }
404
405        assert_eq!(chain.all().len(), 5);
406        assert!(chain.has_previous());
407
408        // Verify ordering (newest first)
409        assert_eq!(chain.current().xref_offset, 1000);
410        assert_eq!(chain.all()[0].xref_offset, 1000);
411        assert_eq!(chain.all()[4].xref_offset, 5000);
412    }
413}