oxidize_pdf/parser/
reader.rs

1//! High-level PDF Reader API
2//!
3//! Provides a simple interface for reading PDF files
4
5use super::header::PdfHeader;
6use super::object_stream::ObjectStream;
7use super::objects::{PdfDictionary, PdfObject};
8use super::trailer::PdfTrailer;
9use super::xref::XRefTable;
10use super::{ParseError, ParseResult};
11use std::collections::HashMap;
12use std::fs::File;
13use std::io::{BufReader, Read, Seek};
14use std::path::Path;
15
16/// High-level PDF reader
17pub struct PdfReader<R: Read + Seek> {
18    reader: BufReader<R>,
19    header: PdfHeader,
20    xref: XRefTable,
21    trailer: PdfTrailer,
22    /// Cache of loaded objects
23    object_cache: HashMap<(u32, u16), PdfObject>,
24    /// Cache of object streams
25    object_stream_cache: HashMap<u32, ObjectStream>,
26    /// Page tree navigator
27    page_tree: Option<super::page_tree::PageTree>,
28}
29
30impl PdfReader<File> {
31    /// Open a PDF file from a path
32    pub fn open<P: AsRef<Path>>(path: P) -> ParseResult<Self> {
33        let file = File::open(path)?;
34        Self::new(file)
35    }
36
37    /// Open a PDF file as a PdfDocument
38    pub fn open_document<P: AsRef<Path>>(
39        path: P,
40    ) -> ParseResult<super::document::PdfDocument<File>> {
41        let reader = Self::open(path)?;
42        Ok(reader.into_document())
43    }
44}
45
46impl<R: Read + Seek> PdfReader<R> {
47    /// Create a new PDF reader from a reader
48    pub fn new(reader: R) -> ParseResult<Self> {
49        let mut buf_reader = BufReader::new(reader);
50
51        // Parse header
52        let header = PdfHeader::parse(&mut buf_reader)?;
53        // Parse xref table
54        let xref = XRefTable::parse(&mut buf_reader)?;
55
56        // Get trailer
57        let trailer_dict = xref.trailer().ok_or(ParseError::InvalidTrailer)?.clone();
58
59        let xref_offset = xref.xref_offset();
60        let trailer = PdfTrailer::from_dict(trailer_dict, xref_offset)?;
61
62        // Validate trailer
63        trailer.validate()?;
64
65        Ok(Self {
66            reader: buf_reader,
67            header,
68            xref,
69            trailer,
70            object_cache: HashMap::new(),
71            object_stream_cache: HashMap::new(),
72            page_tree: None,
73        })
74    }
75
76    /// Get the PDF version
77    pub fn version(&self) -> &super::header::PdfVersion {
78        &self.header.version
79    }
80
81    /// Get the document catalog
82    pub fn catalog(&mut self) -> ParseResult<&PdfDictionary> {
83        let (obj_num, gen_num) = self.trailer.root()?;
84        let catalog = self.get_object(obj_num, gen_num)?;
85
86        catalog.as_dict().ok_or_else(|| ParseError::SyntaxError {
87            position: 0,
88            message: "Catalog is not a dictionary".to_string(),
89        })
90    }
91
92    /// Get the document info dictionary
93    pub fn info(&mut self) -> ParseResult<Option<&PdfDictionary>> {
94        match self.trailer.info() {
95            Some((obj_num, gen_num)) => {
96                let info = self.get_object(obj_num, gen_num)?;
97                Ok(info.as_dict())
98            }
99            None => Ok(None),
100        }
101    }
102
103    /// Get an object by reference
104    pub fn get_object(&mut self, obj_num: u32, gen_num: u16) -> ParseResult<&PdfObject> {
105        let key = (obj_num, gen_num);
106
107        // Check cache first
108        if self.object_cache.contains_key(&key) {
109            return Ok(&self.object_cache[&key]);
110        }
111
112        // Check if this is a compressed object
113        if let Some(ext_entry) = self.xref.get_extended_entry(obj_num) {
114            if let Some((stream_obj_num, index_in_stream)) = ext_entry.compressed_info {
115                // This is a compressed object - need to extract from object stream
116                return self.get_compressed_object(
117                    obj_num,
118                    gen_num,
119                    stream_obj_num,
120                    index_in_stream,
121                );
122            }
123        }
124
125        // Get xref entry
126        let entry = self
127            .xref
128            .get_entry(obj_num)
129            .ok_or(ParseError::InvalidReference(obj_num, gen_num))?;
130
131        if !entry.in_use {
132            // Free object
133            self.object_cache.insert(key, PdfObject::Null);
134            return Ok(&self.object_cache[&key]);
135        }
136
137        if entry.generation != gen_num {
138            return Err(ParseError::InvalidReference(obj_num, gen_num));
139        }
140
141        // Seek to object position
142        self.reader.seek(std::io::SeekFrom::Start(entry.offset))?;
143
144        // Parse object header (obj_num gen_num obj)
145        let mut lexer = super::lexer::Lexer::new(&mut self.reader);
146
147        // Read object number
148        let token = lexer.next_token()?;
149        let read_obj_num = match token {
150            super::lexer::Token::Integer(n) => n as u32,
151            _ => {
152                return Err(ParseError::SyntaxError {
153                    position: entry.offset as usize,
154                    message: "Expected object number".to_string(),
155                })
156            }
157        };
158
159        if read_obj_num != obj_num {
160            return Err(ParseError::SyntaxError {
161                position: entry.offset as usize,
162                message: format!(
163                    "Object number mismatch: expected {obj_num}, found {read_obj_num}"
164                ),
165            });
166        }
167
168        // Read generation number
169        let token = lexer.next_token()?;
170        let read_gen_num = match token {
171            super::lexer::Token::Integer(n) => n as u16,
172            _ => {
173                return Err(ParseError::SyntaxError {
174                    position: entry.offset as usize,
175                    message: "Expected generation number".to_string(),
176                })
177            }
178        };
179
180        if read_gen_num != gen_num {
181            return Err(ParseError::SyntaxError {
182                position: entry.offset as usize,
183                message: format!(
184                    "Generation number mismatch: expected {gen_num}, found {read_gen_num}"
185                ),
186            });
187        }
188
189        // Read 'obj' keyword
190        let token = lexer.next_token()?;
191        match token {
192            super::lexer::Token::Obj => {}
193            _ => {
194                return Err(ParseError::SyntaxError {
195                    position: entry.offset as usize,
196                    message: "Expected 'obj' keyword".to_string(),
197                })
198            }
199        };
200
201        // Parse the actual object
202        let obj = PdfObject::parse(&mut lexer)?;
203
204        // Read 'endobj' keyword
205        let token = lexer.next_token()?;
206        match token {
207            super::lexer::Token::EndObj => {}
208            _ => {
209                return Err(ParseError::SyntaxError {
210                    position: entry.offset as usize,
211                    message: "Expected 'endobj' keyword".to_string(),
212                })
213            }
214        };
215
216        // Cache the object
217        self.object_cache.insert(key, obj);
218        Ok(&self.object_cache[&key])
219    }
220
221    /// Resolve a reference to get the actual object
222    pub fn resolve<'a>(&'a mut self, obj: &'a PdfObject) -> ParseResult<&'a PdfObject> {
223        match obj {
224            PdfObject::Reference(obj_num, gen_num) => self.get_object(*obj_num, *gen_num),
225            _ => Ok(obj),
226        }
227    }
228
229    /// Get a compressed object from an object stream
230    fn get_compressed_object(
231        &mut self,
232        obj_num: u32,
233        gen_num: u16,
234        stream_obj_num: u32,
235        _index_in_stream: u32,
236    ) -> ParseResult<&PdfObject> {
237        let key = (obj_num, gen_num);
238
239        // Load the object stream if not cached
240        if !self.object_stream_cache.contains_key(&stream_obj_num) {
241            // Get the stream object
242            let stream_obj = self.get_object(stream_obj_num, 0)?;
243
244            if let Some(stream) = stream_obj.as_stream() {
245                // Parse the object stream
246                let obj_stream = ObjectStream::parse(stream.clone())?;
247                self.object_stream_cache.insert(stream_obj_num, obj_stream);
248            } else {
249                return Err(ParseError::SyntaxError {
250                    position: 0,
251                    message: format!("Object {stream_obj_num} is not a stream"),
252                });
253            }
254        }
255
256        // Get the object from the stream
257        let obj_stream = &self.object_stream_cache[&stream_obj_num];
258        let obj = obj_stream
259            .get_object(obj_num)
260            .ok_or_else(|| ParseError::SyntaxError {
261                position: 0,
262                message: format!("Object {obj_num} not found in object stream {stream_obj_num}"),
263            })?;
264
265        // Cache the object
266        self.object_cache.insert(key, obj.clone());
267        Ok(&self.object_cache[&key])
268    }
269
270    /// Get the page tree root
271    pub fn pages(&mut self) -> ParseResult<&PdfDictionary> {
272        // Get the pages reference from catalog first
273        let (pages_obj_num, pages_gen_num) = {
274            let catalog = self.catalog()?;
275            let pages_ref = catalog
276                .get("Pages")
277                .ok_or_else(|| ParseError::MissingKey("Pages".to_string()))?;
278
279            match pages_ref {
280                PdfObject::Reference(obj_num, gen_num) => (*obj_num, *gen_num),
281                _ => {
282                    return Err(ParseError::SyntaxError {
283                        position: 0,
284                        message: "Pages must be a reference".to_string(),
285                    })
286                }
287            }
288        };
289
290        // Now we can get the pages object without holding a reference to catalog
291        let pages_obj = self.get_object(pages_obj_num, pages_gen_num)?;
292        pages_obj.as_dict().ok_or_else(|| ParseError::SyntaxError {
293            position: 0,
294            message: "Pages is not a dictionary".to_string(),
295        })
296    }
297
298    /// Get the number of pages
299    pub fn page_count(&mut self) -> ParseResult<u32> {
300        let pages = self.pages()?;
301        pages
302            .get("Count")
303            .and_then(|obj| obj.as_integer())
304            .map(|count| count as u32)
305            .ok_or_else(|| ParseError::MissingKey("Count".to_string()))
306    }
307
308    /// Get metadata from the document
309    pub fn metadata(&mut self) -> ParseResult<DocumentMetadata> {
310        let mut metadata = DocumentMetadata::default();
311
312        if let Some(info_dict) = self.info()? {
313            if let Some(title) = info_dict.get("Title").and_then(|o| o.as_string()) {
314                metadata.title = title.as_str().ok().map(|s| s.to_string());
315            }
316            if let Some(author) = info_dict.get("Author").and_then(|o| o.as_string()) {
317                metadata.author = author.as_str().ok().map(|s| s.to_string());
318            }
319            if let Some(subject) = info_dict.get("Subject").and_then(|o| o.as_string()) {
320                metadata.subject = subject.as_str().ok().map(|s| s.to_string());
321            }
322            if let Some(keywords) = info_dict.get("Keywords").and_then(|o| o.as_string()) {
323                metadata.keywords = keywords.as_str().ok().map(|s| s.to_string());
324            }
325            if let Some(creator) = info_dict.get("Creator").and_then(|o| o.as_string()) {
326                metadata.creator = creator.as_str().ok().map(|s| s.to_string());
327            }
328            if let Some(producer) = info_dict.get("Producer").and_then(|o| o.as_string()) {
329                metadata.producer = producer.as_str().ok().map(|s| s.to_string());
330            }
331        }
332
333        metadata.version = self.version().to_string();
334        metadata.page_count = self.page_count().ok();
335
336        Ok(metadata)
337    }
338
339    /// Initialize the page tree navigator if not already done
340    fn ensure_page_tree(&mut self) -> ParseResult<()> {
341        if self.page_tree.is_none() {
342            let page_count = self.page_count()?;
343            self.page_tree = Some(super::page_tree::PageTree::new(page_count));
344        }
345        Ok(())
346    }
347
348    /// Get a specific page by index (0-based)
349    ///
350    /// Note: This method is currently not implemented due to borrow checker constraints.
351    /// The page_tree needs mutable access to both itself and the reader, which requires
352    /// a redesign of the architecture. Use PdfDocument instead for page access.
353    pub fn get_page(&mut self, _index: u32) -> ParseResult<&super::page_tree::ParsedPage> {
354        self.ensure_page_tree()?;
355
356        // The page_tree needs mutable access to both itself and the reader
357        // This requires a redesign of the architecture to avoid the borrow checker issue
358        // For now, users should convert to PdfDocument using into_document() for page access
359        Err(ParseError::SyntaxError {
360            position: 0,
361            message: "get_page not implemented due to borrow checker constraints. Use PdfDocument instead.".to_string(),
362        })
363    }
364
365    /// Get all pages
366    pub fn get_all_pages(&mut self) -> ParseResult<Vec<super::page_tree::ParsedPage>> {
367        let page_count = self.page_count()?;
368        let mut pages = Vec::with_capacity(page_count as usize);
369
370        for i in 0..page_count {
371            let page = self.get_page(i)?.clone();
372            pages.push(page);
373        }
374
375        Ok(pages)
376    }
377
378    /// Convert this reader into a PdfDocument for easier page access
379    pub fn into_document(self) -> super::document::PdfDocument<R> {
380        super::document::PdfDocument::new(self)
381    }
382}
383
384/// Document metadata
385#[derive(Debug, Default, Clone)]
386pub struct DocumentMetadata {
387    pub title: Option<String>,
388    pub author: Option<String>,
389    pub subject: Option<String>,
390    pub keywords: Option<String>,
391    pub creator: Option<String>,
392    pub producer: Option<String>,
393    pub creation_date: Option<String>,
394    pub modification_date: Option<String>,
395    pub version: String,
396    pub page_count: Option<u32>,
397}
398
399#[cfg(test)]
400mod tests {
401
402    use super::*;
403    use crate::parser::objects::{PdfName, PdfString};
404    use crate::parser::test_helpers::*;
405    use std::io::Cursor;
406
407    #[test]
408    fn test_reader_construction() {
409        let pdf_data = create_minimal_pdf();
410        let cursor = Cursor::new(pdf_data);
411        let result = PdfReader::new(cursor);
412        assert!(result.is_ok());
413    }
414
415    #[test]
416    fn test_reader_version() {
417        let pdf_data = create_minimal_pdf();
418        let cursor = Cursor::new(pdf_data);
419        let reader = PdfReader::new(cursor).unwrap();
420        assert_eq!(reader.version().major, 1);
421        assert_eq!(reader.version().minor, 4);
422    }
423
424    #[test]
425    fn test_reader_different_versions() {
426        let versions = vec![
427            "1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "2.0",
428        ];
429
430        for version in versions {
431            let pdf_data = create_pdf_with_version(version);
432            let cursor = Cursor::new(pdf_data);
433            let reader = PdfReader::new(cursor).unwrap();
434
435            let parts: Vec<&str> = version.split('.').collect();
436            assert_eq!(reader.version().major, parts[0].parse::<u8>().unwrap());
437            assert_eq!(reader.version().minor, parts[1].parse::<u8>().unwrap());
438        }
439    }
440
441    #[test]
442    fn test_reader_catalog() {
443        let pdf_data = create_minimal_pdf();
444        let cursor = Cursor::new(pdf_data);
445        let mut reader = PdfReader::new(cursor).unwrap();
446
447        let catalog = reader.catalog();
448        assert!(catalog.is_ok());
449
450        let catalog_dict = catalog.unwrap();
451        assert_eq!(
452            catalog_dict.get("Type"),
453            Some(&PdfObject::Name(PdfName("Catalog".to_string())))
454        );
455    }
456
457    #[test]
458    fn test_reader_info_none() {
459        let pdf_data = create_minimal_pdf();
460        let cursor = Cursor::new(pdf_data);
461        let mut reader = PdfReader::new(cursor).unwrap();
462
463        let info = reader.info().unwrap();
464        assert!(info.is_none());
465    }
466
467    #[test]
468    fn test_reader_info_present() {
469        let pdf_data = create_pdf_with_info();
470        let cursor = Cursor::new(pdf_data);
471        let mut reader = PdfReader::new(cursor).unwrap();
472
473        let info = reader.info().unwrap();
474        assert!(info.is_some());
475
476        let info_dict = info.unwrap();
477        assert_eq!(
478            info_dict.get("Title"),
479            Some(&PdfObject::String(PdfString(
480                "Test PDF".to_string().into_bytes()
481            )))
482        );
483        assert_eq!(
484            info_dict.get("Author"),
485            Some(&PdfObject::String(PdfString(
486                "Test Author".to_string().into_bytes()
487            )))
488        );
489    }
490
491    #[test]
492    fn test_reader_get_object() {
493        let pdf_data = create_minimal_pdf();
494        let cursor = Cursor::new(pdf_data);
495        let mut reader = PdfReader::new(cursor).unwrap();
496
497        // Get catalog object (1 0 obj)
498        let obj = reader.get_object(1, 0);
499        assert!(obj.is_ok());
500
501        let catalog = obj.unwrap();
502        assert!(catalog.as_dict().is_some());
503    }
504
505    #[test]
506    fn test_reader_get_invalid_object() {
507        let pdf_data = create_minimal_pdf();
508        let cursor = Cursor::new(pdf_data);
509        let mut reader = PdfReader::new(cursor).unwrap();
510
511        // Try to get non-existent object
512        let obj = reader.get_object(999, 0);
513        assert!(obj.is_err());
514    }
515
516    #[test]
517    fn test_reader_get_free_object() {
518        let pdf_data = create_minimal_pdf();
519        let cursor = Cursor::new(pdf_data);
520        let mut reader = PdfReader::new(cursor).unwrap();
521
522        // Object 0 is always free (f flag in xref)
523        let obj = reader.get_object(0, 65535);
524        assert!(obj.is_ok());
525        assert_eq!(obj.unwrap(), &PdfObject::Null);
526    }
527
528    #[test]
529    fn test_reader_resolve_reference() {
530        let pdf_data = create_minimal_pdf();
531        let cursor = Cursor::new(pdf_data);
532        let mut reader = PdfReader::new(cursor).unwrap();
533
534        // Create a reference to catalog
535        let ref_obj = PdfObject::Reference(1, 0);
536        let resolved = reader.resolve(&ref_obj);
537
538        assert!(resolved.is_ok());
539        assert!(resolved.unwrap().as_dict().is_some());
540    }
541
542    #[test]
543    fn test_reader_resolve_non_reference() {
544        let pdf_data = create_minimal_pdf();
545        let cursor = Cursor::new(pdf_data);
546        let mut reader = PdfReader::new(cursor).unwrap();
547
548        // Resolve a non-reference object
549        let int_obj = PdfObject::Integer(42);
550        let resolved = reader.resolve(&int_obj).unwrap();
551
552        assert_eq!(resolved, &PdfObject::Integer(42));
553    }
554
555    #[test]
556    fn test_reader_cache_behavior() {
557        let pdf_data = create_minimal_pdf();
558        let cursor = Cursor::new(pdf_data);
559        let mut reader = PdfReader::new(cursor).unwrap();
560
561        // Get object first time
562        let obj1 = reader.get_object(1, 0).unwrap();
563        assert!(obj1.as_dict().is_some());
564
565        // Get same object again - should use cache
566        let obj2 = reader.get_object(1, 0).unwrap();
567        assert!(obj2.as_dict().is_some());
568    }
569
570    #[test]
571    fn test_reader_wrong_generation() {
572        let pdf_data = create_minimal_pdf();
573        let cursor = Cursor::new(pdf_data);
574        let mut reader = PdfReader::new(cursor).unwrap();
575
576        // Try to get object with wrong generation number
577        let obj = reader.get_object(1, 99);
578        assert!(obj.is_err());
579    }
580
581    #[test]
582    fn test_reader_invalid_pdf() {
583        let invalid_data = b"This is not a PDF file";
584        let cursor = Cursor::new(invalid_data.to_vec());
585        let result = PdfReader::new(cursor);
586
587        assert!(result.is_err());
588    }
589
590    #[test]
591    fn test_reader_corrupt_xref() {
592        let corrupt_pdf = b"%PDF-1.4
5931 0 obj
594<< /Type /Catalog >>
595endobj
596xref
597corrupted xref table
598trailer
599<< /Size 2 /Root 1 0 R >>
600startxref
60124
602%%EOF"
603            .to_vec();
604
605        let cursor = Cursor::new(corrupt_pdf);
606        let result = PdfReader::new(cursor);
607        assert!(result.is_err());
608    }
609
610    #[test]
611    fn test_reader_missing_trailer() {
612        let pdf_no_trailer = b"%PDF-1.4
6131 0 obj
614<< /Type /Catalog >>
615endobj
616xref
6170 2
6180000000000 65535 f 
6190000000009 00000 n 
620startxref
62124
622%%EOF"
623            .to_vec();
624
625        let cursor = Cursor::new(pdf_no_trailer);
626        let result = PdfReader::new(cursor);
627        assert!(result.is_err());
628    }
629
630    #[test]
631    fn test_reader_empty_pdf() {
632        let cursor = Cursor::new(Vec::new());
633        let result = PdfReader::new(cursor);
634        assert!(result.is_err());
635    }
636
637    #[test]
638    fn test_reader_page_count() {
639        let pdf_data = create_minimal_pdf();
640        let cursor = Cursor::new(pdf_data);
641        let mut reader = PdfReader::new(cursor).unwrap();
642
643        let count = reader.page_count();
644        assert!(count.is_ok());
645        assert_eq!(count.unwrap(), 0); // Minimal PDF has no pages
646    }
647
648    #[test]
649    fn test_reader_into_document() {
650        let pdf_data = create_minimal_pdf();
651        let cursor = Cursor::new(pdf_data);
652        let reader = PdfReader::new(cursor).unwrap();
653
654        let document = reader.into_document();
655        // Document should be valid
656        let page_count = document.page_count();
657        assert!(page_count.is_ok());
658    }
659
660    #[test]
661    fn test_reader_pages_dict() {
662        let pdf_data = create_minimal_pdf();
663        let cursor = Cursor::new(pdf_data);
664        let mut reader = PdfReader::new(cursor).unwrap();
665
666        let pages = reader.pages();
667        assert!(pages.is_ok());
668        let pages_dict = pages.unwrap();
669        assert_eq!(
670            pages_dict.get("Type"),
671            Some(&PdfObject::Name(PdfName("Pages".to_string())))
672        );
673    }
674
675    #[test]
676    fn test_reader_pdf_with_binary_data() {
677        let pdf_data = create_pdf_with_binary_marker();
678
679        let cursor = Cursor::new(pdf_data);
680        let result = PdfReader::new(cursor);
681        assert!(result.is_ok());
682    }
683
684    #[test]
685    fn test_reader_metadata() {
686        let pdf_data = create_pdf_with_info();
687        let cursor = Cursor::new(pdf_data);
688        let mut reader = PdfReader::new(cursor).unwrap();
689
690        let metadata = reader.metadata().unwrap();
691        assert_eq!(metadata.title, Some("Test PDF".to_string()));
692        assert_eq!(metadata.author, Some("Test Author".to_string()));
693        assert_eq!(metadata.subject, Some("Testing".to_string()));
694        assert_eq!(metadata.version, "1.4".to_string());
695    }
696
697    #[test]
698    fn test_reader_metadata_empty() {
699        let pdf_data = create_minimal_pdf();
700        let cursor = Cursor::new(pdf_data);
701        let mut reader = PdfReader::new(cursor).unwrap();
702
703        let metadata = reader.metadata().unwrap();
704        assert!(metadata.title.is_none());
705        assert!(metadata.author.is_none());
706        assert_eq!(metadata.version, "1.4".to_string());
707        assert_eq!(metadata.page_count, Some(0));
708    }
709
710    #[test]
711    fn test_reader_object_number_mismatch() {
712        // This test validates that the reader properly handles
713        // object number mismatches. We'll create a valid PDF
714        // and then try to access an object with wrong generation number
715        let pdf_data = create_minimal_pdf();
716        let cursor = Cursor::new(pdf_data);
717        let mut reader = PdfReader::new(cursor).unwrap();
718
719        // Object 1 exists with generation 0
720        // Try to get it with wrong generation number
721        let result = reader.get_object(1, 99);
722        assert!(result.is_err());
723
724        // Also test with a non-existent object number
725        let result2 = reader.get_object(999, 0);
726        assert!(result2.is_err());
727    }
728
729    #[test]
730    fn test_document_metadata_struct() {
731        let metadata = DocumentMetadata {
732            title: Some("Title".to_string()),
733            author: Some("Author".to_string()),
734            subject: Some("Subject".to_string()),
735            keywords: Some("Keywords".to_string()),
736            creator: Some("Creator".to_string()),
737            producer: Some("Producer".to_string()),
738            creation_date: Some("D:20240101".to_string()),
739            modification_date: Some("D:20240102".to_string()),
740            version: "1.5".to_string(),
741            page_count: Some(10),
742        };
743
744        assert_eq!(metadata.title, Some("Title".to_string()));
745        assert_eq!(metadata.page_count, Some(10));
746    }
747
748    #[test]
749    fn test_document_metadata_default() {
750        let metadata = DocumentMetadata::default();
751        assert!(metadata.title.is_none());
752        assert!(metadata.author.is_none());
753        assert!(metadata.subject.is_none());
754        assert!(metadata.keywords.is_none());
755        assert!(metadata.creator.is_none());
756        assert!(metadata.producer.is_none());
757        assert!(metadata.creation_date.is_none());
758        assert!(metadata.modification_date.is_none());
759        assert_eq!(metadata.version, "".to_string());
760        assert!(metadata.page_count.is_none());
761    }
762
763    #[test]
764    fn test_document_metadata_clone() {
765        let metadata = DocumentMetadata {
766            title: Some("Test".to_string()),
767            version: "1.4".to_string(),
768            ..Default::default()
769        };
770
771        let cloned = metadata.clone();
772        assert_eq!(cloned.title, Some("Test".to_string()));
773        assert_eq!(cloned.version, "1.4".to_string());
774    }
775
776    #[test]
777    fn test_reader_trailer_validation_error() {
778        // PDF with invalid trailer (missing required keys)
779        let bad_pdf = b"%PDF-1.4
7801 0 obj
781<< /Type /Catalog >>
782endobj
783xref
7840 2
7850000000000 65535 f 
7860000000009 00000 n 
787trailer
788<< /Size 2 >>
789startxref
79046
791%%EOF"
792            .to_vec();
793
794        let cursor = Cursor::new(bad_pdf);
795        let result = PdfReader::new(cursor);
796        assert!(result.is_err()); // Should fail because trailer is missing /Root
797    }
798}