oxidize_pdf/parser/
reader.rs

1//! High-level PDF Reader API
2//!
3//! Provides a simple interface for reading PDF files
4
5use super::header::PdfHeader;
6use super::object_stream::ObjectStream;
7use super::objects::{PdfDictionary, PdfObject};
8use super::trailer::PdfTrailer;
9use super::xref::XRefTable;
10use super::{ParseError, ParseResult};
11use std::collections::HashMap;
12use std::fs::File;
13use std::io::{BufReader, Read, Seek};
14use std::path::Path;
15
16/// High-level PDF reader
17pub struct PdfReader<R: Read + Seek> {
18    reader: BufReader<R>,
19    header: PdfHeader,
20    xref: XRefTable,
21    trailer: PdfTrailer,
22    /// Cache of loaded objects
23    object_cache: HashMap<(u32, u16), PdfObject>,
24    /// Cache of object streams
25    object_stream_cache: HashMap<u32, ObjectStream>,
26    /// Page tree navigator
27    page_tree: Option<super::page_tree::PageTree>,
28}
29
30impl PdfReader<File> {
31    /// Open a PDF file from a path
32    pub fn open<P: AsRef<Path>>(path: P) -> ParseResult<Self> {
33        let file = File::open(path)?;
34        Self::new(file)
35    }
36
37    /// Open a PDF file as a PdfDocument
38    pub fn open_document<P: AsRef<Path>>(
39        path: P,
40    ) -> ParseResult<super::document::PdfDocument<File>> {
41        let reader = Self::open(path)?;
42        Ok(reader.into_document())
43    }
44}
45
46impl<R: Read + Seek> PdfReader<R> {
47    /// Create a new PDF reader from a reader
48    pub fn new(reader: R) -> ParseResult<Self> {
49        let mut buf_reader = BufReader::new(reader);
50
51        // Parse header
52        let header = PdfHeader::parse(&mut buf_reader)?;
53        // Parse xref table
54        let xref = XRefTable::parse(&mut buf_reader)?;
55
56        // Get trailer
57        let trailer_dict = xref.trailer().ok_or(ParseError::InvalidTrailer)?.clone();
58
59        let xref_offset = xref.xref_offset();
60        let trailer = PdfTrailer::from_dict(trailer_dict, xref_offset)?;
61
62        // Validate trailer
63        trailer.validate()?;
64
65        Ok(Self {
66            reader: buf_reader,
67            header,
68            xref,
69            trailer,
70            object_cache: HashMap::new(),
71            object_stream_cache: HashMap::new(),
72            page_tree: None,
73        })
74    }
75
76    /// Get the PDF version
77    pub fn version(&self) -> &super::header::PdfVersion {
78        &self.header.version
79    }
80
81    /// Get the document catalog
82    pub fn catalog(&mut self) -> ParseResult<&PdfDictionary> {
83        let (obj_num, gen_num) = self.trailer.root()?;
84        let catalog = self.get_object(obj_num, gen_num)?;
85
86        catalog.as_dict().ok_or_else(|| ParseError::SyntaxError {
87            position: 0,
88            message: "Catalog is not a dictionary".to_string(),
89        })
90    }
91
92    /// Get the document info dictionary
93    pub fn info(&mut self) -> ParseResult<Option<&PdfDictionary>> {
94        match self.trailer.info() {
95            Some((obj_num, gen_num)) => {
96                let info = self.get_object(obj_num, gen_num)?;
97                Ok(info.as_dict())
98            }
99            None => Ok(None),
100        }
101    }
102
103    /// Get an object by reference
104    pub fn get_object(&mut self, obj_num: u32, gen_num: u16) -> ParseResult<&PdfObject> {
105        let key = (obj_num, gen_num);
106
107        // Check cache first
108        if self.object_cache.contains_key(&key) {
109            return Ok(&self.object_cache[&key]);
110        }
111
112        // Check if this is a compressed object
113        if let Some(ext_entry) = self.xref.get_extended_entry(obj_num) {
114            if let Some((stream_obj_num, index_in_stream)) = ext_entry.compressed_info {
115                // This is a compressed object - need to extract from object stream
116                return self.get_compressed_object(
117                    obj_num,
118                    gen_num,
119                    stream_obj_num,
120                    index_in_stream,
121                );
122            }
123        }
124
125        // Get xref entry
126        let entry = self
127            .xref
128            .get_entry(obj_num)
129            .ok_or(ParseError::InvalidReference(obj_num, gen_num))?;
130
131        if !entry.in_use {
132            // Free object
133            self.object_cache.insert(key, PdfObject::Null);
134            return Ok(&self.object_cache[&key]);
135        }
136
137        if entry.generation != gen_num {
138            return Err(ParseError::InvalidReference(obj_num, gen_num));
139        }
140
141        // Seek to object position
142        self.reader.seek(std::io::SeekFrom::Start(entry.offset))?;
143
144        // Parse object header (obj_num gen_num obj)
145        let mut lexer = super::lexer::Lexer::new(&mut self.reader);
146
147        // Read object number
148        let token = lexer.next_token()?;
149        let read_obj_num = match token {
150            super::lexer::Token::Integer(n) => n as u32,
151            _ => {
152                return Err(ParseError::SyntaxError {
153                    position: entry.offset as usize,
154                    message: "Expected object number".to_string(),
155                })
156            }
157        };
158
159        if read_obj_num != obj_num {
160            return Err(ParseError::SyntaxError {
161                position: entry.offset as usize,
162                message: format!(
163                    "Object number mismatch: expected {obj_num}, found {read_obj_num}"
164                ),
165            });
166        }
167
168        // Read generation number
169        let token = lexer.next_token()?;
170        let read_gen_num = match token {
171            super::lexer::Token::Integer(n) => n as u16,
172            _ => {
173                return Err(ParseError::SyntaxError {
174                    position: entry.offset as usize,
175                    message: "Expected generation number".to_string(),
176                })
177            }
178        };
179
180        if read_gen_num != gen_num {
181            return Err(ParseError::SyntaxError {
182                position: entry.offset as usize,
183                message: format!(
184                    "Generation number mismatch: expected {gen_num}, found {read_gen_num}"
185                ),
186            });
187        }
188
189        // Read 'obj' keyword
190        let token = lexer.next_token()?;
191        match token {
192            super::lexer::Token::Obj => {}
193            _ => {
194                return Err(ParseError::SyntaxError {
195                    position: entry.offset as usize,
196                    message: "Expected 'obj' keyword".to_string(),
197                })
198            }
199        };
200
201        // Parse the actual object
202        let obj = PdfObject::parse(&mut lexer)?;
203
204        // Read 'endobj' keyword
205        let token = lexer.next_token()?;
206        match token {
207            super::lexer::Token::EndObj => {}
208            _ => {
209                return Err(ParseError::SyntaxError {
210                    position: entry.offset as usize,
211                    message: "Expected 'endobj' keyword".to_string(),
212                })
213            }
214        };
215
216        // Cache the object
217        self.object_cache.insert(key, obj);
218        Ok(&self.object_cache[&key])
219    }
220
221    /// Resolve a reference to get the actual object
222    pub fn resolve<'a>(&'a mut self, obj: &'a PdfObject) -> ParseResult<&'a PdfObject> {
223        match obj {
224            PdfObject::Reference(obj_num, gen_num) => self.get_object(*obj_num, *gen_num),
225            _ => Ok(obj),
226        }
227    }
228
229    /// Get a compressed object from an object stream
230    fn get_compressed_object(
231        &mut self,
232        obj_num: u32,
233        gen_num: u16,
234        stream_obj_num: u32,
235        _index_in_stream: u32,
236    ) -> ParseResult<&PdfObject> {
237        let key = (obj_num, gen_num);
238
239        // Load the object stream if not cached
240        if !self.object_stream_cache.contains_key(&stream_obj_num) {
241            // Get the stream object
242            let stream_obj = self.get_object(stream_obj_num, 0)?;
243
244            if let Some(stream) = stream_obj.as_stream() {
245                // Parse the object stream
246                let obj_stream = ObjectStream::parse(stream.clone())?;
247                self.object_stream_cache.insert(stream_obj_num, obj_stream);
248            } else {
249                return Err(ParseError::SyntaxError {
250                    position: 0,
251                    message: format!("Object {stream_obj_num} is not a stream"),
252                });
253            }
254        }
255
256        // Get the object from the stream
257        let obj_stream = &self.object_stream_cache[&stream_obj_num];
258        let obj = obj_stream
259            .get_object(obj_num)
260            .ok_or_else(|| ParseError::SyntaxError {
261                position: 0,
262                message: format!("Object {obj_num} not found in object stream {stream_obj_num}"),
263            })?;
264
265        // Cache the object
266        self.object_cache.insert(key, obj.clone());
267        Ok(&self.object_cache[&key])
268    }
269
270    /// Get the page tree root
271    pub fn pages(&mut self) -> ParseResult<&PdfDictionary> {
272        // Get the pages reference from catalog first
273        let (pages_obj_num, pages_gen_num) = {
274            let catalog = self.catalog()?;
275            let pages_ref = catalog
276                .get("Pages")
277                .ok_or_else(|| ParseError::MissingKey("Pages".to_string()))?;
278
279            match pages_ref {
280                PdfObject::Reference(obj_num, gen_num) => (*obj_num, *gen_num),
281                _ => {
282                    return Err(ParseError::SyntaxError {
283                        position: 0,
284                        message: "Pages must be a reference".to_string(),
285                    })
286                }
287            }
288        };
289
290        // Now we can get the pages object without holding a reference to catalog
291        let pages_obj = self.get_object(pages_obj_num, pages_gen_num)?;
292        pages_obj.as_dict().ok_or_else(|| ParseError::SyntaxError {
293            position: 0,
294            message: "Pages is not a dictionary".to_string(),
295        })
296    }
297
298    /// Get the number of pages
299    pub fn page_count(&mut self) -> ParseResult<u32> {
300        let pages = self.pages()?;
301        pages
302            .get("Count")
303            .and_then(|obj| obj.as_integer())
304            .map(|count| count as u32)
305            .ok_or_else(|| ParseError::MissingKey("Count".to_string()))
306    }
307
308    /// Get metadata from the document
309    pub fn metadata(&mut self) -> ParseResult<DocumentMetadata> {
310        let mut metadata = DocumentMetadata::default();
311
312        if let Some(info_dict) = self.info()? {
313            if let Some(title) = info_dict.get("Title").and_then(|o| o.as_string()) {
314                metadata.title = title.as_str().ok().map(|s| s.to_string());
315            }
316            if let Some(author) = info_dict.get("Author").and_then(|o| o.as_string()) {
317                metadata.author = author.as_str().ok().map(|s| s.to_string());
318            }
319            if let Some(subject) = info_dict.get("Subject").and_then(|o| o.as_string()) {
320                metadata.subject = subject.as_str().ok().map(|s| s.to_string());
321            }
322            if let Some(keywords) = info_dict.get("Keywords").and_then(|o| o.as_string()) {
323                metadata.keywords = keywords.as_str().ok().map(|s| s.to_string());
324            }
325            if let Some(creator) = info_dict.get("Creator").and_then(|o| o.as_string()) {
326                metadata.creator = creator.as_str().ok().map(|s| s.to_string());
327            }
328            if let Some(producer) = info_dict.get("Producer").and_then(|o| o.as_string()) {
329                metadata.producer = producer.as_str().ok().map(|s| s.to_string());
330            }
331        }
332
333        metadata.version = self.version().to_string();
334        metadata.page_count = self.page_count().ok();
335
336        Ok(metadata)
337    }
338
339    /// Initialize the page tree navigator if not already done
340    fn ensure_page_tree(&mut self) -> ParseResult<()> {
341        if self.page_tree.is_none() {
342            let page_count = self.page_count()?;
343            self.page_tree = Some(super::page_tree::PageTree::new(page_count));
344        }
345        Ok(())
346    }
347
348    /// Get a specific page by index (0-based)
349    /// 
350    /// Note: This method is currently not implemented due to borrow checker constraints.
351    /// The page_tree needs mutable access to both itself and the reader, which requires
352    /// a redesign of the architecture. Use PdfDocument instead for page access.
353    pub fn get_page(&mut self, _index: u32) -> ParseResult<&super::page_tree::ParsedPage> {
354        self.ensure_page_tree()?;
355
356        // The page_tree needs mutable access to both itself and the reader
357        // This requires a redesign of the architecture to avoid the borrow checker issue
358        // For now, users should convert to PdfDocument using into_document() for page access
359        Err(ParseError::SyntaxError {
360            position: 0,
361            message: "get_page not implemented due to borrow checker constraints. Use PdfDocument instead.".to_string(),
362        })
363    }
364
365    /// Get all pages
366    pub fn get_all_pages(&mut self) -> ParseResult<Vec<super::page_tree::ParsedPage>> {
367        let page_count = self.page_count()?;
368        let mut pages = Vec::with_capacity(page_count as usize);
369
370        for i in 0..page_count {
371            let page = self.get_page(i)?.clone();
372            pages.push(page);
373        }
374
375        Ok(pages)
376    }
377
378    /// Convert this reader into a PdfDocument for easier page access
379    pub fn into_document(self) -> super::document::PdfDocument<R> {
380        super::document::PdfDocument::new(self)
381    }
382}
383
384/// Document metadata
385#[derive(Debug, Default, Clone)]
386pub struct DocumentMetadata {
387    pub title: Option<String>,
388    pub author: Option<String>,
389    pub subject: Option<String>,
390    pub keywords: Option<String>,
391    pub creator: Option<String>,
392    pub producer: Option<String>,
393    pub creation_date: Option<String>,
394    pub modification_date: Option<String>,
395    pub version: String,
396    pub page_count: Option<u32>,
397}
398
399#[cfg(test)]
400mod tests {
401
402    use super::*;
403    use std::io::Cursor;
404    use crate::parser::objects::{PdfName, PdfString};
405    use crate::parser::test_helpers::*;
406
407
408
409    #[test]
410    fn test_reader_construction() {
411        let pdf_data = create_minimal_pdf();
412        let cursor = Cursor::new(pdf_data);
413        let result = PdfReader::new(cursor);
414        assert!(result.is_ok());
415    }
416
417    #[test]
418    fn test_reader_version() {
419        let pdf_data = create_minimal_pdf();
420        let cursor = Cursor::new(pdf_data);
421        let reader = PdfReader::new(cursor).unwrap();
422        assert_eq!(reader.version().major, 1);
423        assert_eq!(reader.version().minor, 4);
424    }
425
426    #[test]
427    fn test_reader_different_versions() {
428        let versions = vec!["1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "2.0"];
429        
430        for version in versions {
431            let pdf_data = create_pdf_with_version(version);
432            let cursor = Cursor::new(pdf_data);
433            let reader = PdfReader::new(cursor).unwrap();
434            
435            let parts: Vec<&str> = version.split('.').collect();
436            assert_eq!(reader.version().major, parts[0].parse::<u8>().unwrap());
437            assert_eq!(reader.version().minor, parts[1].parse::<u8>().unwrap());
438        }
439    }
440
441    #[test]
442    fn test_reader_catalog() {
443        let pdf_data = create_minimal_pdf();
444        let cursor = Cursor::new(pdf_data);
445        let mut reader = PdfReader::new(cursor).unwrap();
446        
447        let catalog = reader.catalog();
448        assert!(catalog.is_ok());
449        
450        let catalog_dict = catalog.unwrap();
451        assert_eq!(catalog_dict.get("Type"), Some(&PdfObject::Name(PdfName("Catalog".to_string()))));
452    }
453
454    #[test]
455    fn test_reader_info_none() {
456        let pdf_data = create_minimal_pdf();
457        let cursor = Cursor::new(pdf_data);
458        let mut reader = PdfReader::new(cursor).unwrap();
459        
460        let info = reader.info().unwrap();
461        assert!(info.is_none());
462    }
463
464    #[test]
465    fn test_reader_info_present() {
466        let pdf_data = create_pdf_with_info();
467        let cursor = Cursor::new(pdf_data);
468        let mut reader = PdfReader::new(cursor).unwrap();
469        
470        let info = reader.info().unwrap();
471        assert!(info.is_some());
472        
473        let info_dict = info.unwrap();
474        assert_eq!(info_dict.get("Title"), Some(&PdfObject::String(PdfString("Test PDF".to_string().into_bytes()))));
475        assert_eq!(info_dict.get("Author"), Some(&PdfObject::String(PdfString("Test Author".to_string().into_bytes()))));
476    }
477
478    #[test]
479    fn test_reader_get_object() {
480        let pdf_data = create_minimal_pdf();
481        let cursor = Cursor::new(pdf_data);
482        let mut reader = PdfReader::new(cursor).unwrap();
483        
484        // Get catalog object (1 0 obj)
485        let obj = reader.get_object(1, 0);
486        assert!(obj.is_ok());
487        
488        let catalog = obj.unwrap();
489        assert!(catalog.as_dict().is_some());
490    }
491
492    #[test]
493    fn test_reader_get_invalid_object() {
494        let pdf_data = create_minimal_pdf();
495        let cursor = Cursor::new(pdf_data);
496        let mut reader = PdfReader::new(cursor).unwrap();
497        
498        // Try to get non-existent object
499        let obj = reader.get_object(999, 0);
500        assert!(obj.is_err());
501    }
502
503    #[test]
504    fn test_reader_get_free_object() {
505        let pdf_data = create_minimal_pdf();
506        let cursor = Cursor::new(pdf_data);
507        let mut reader = PdfReader::new(cursor).unwrap();
508        
509        // Object 0 is always free (f flag in xref)
510        let obj = reader.get_object(0, 65535);
511        assert!(obj.is_ok());
512        assert_eq!(obj.unwrap(), &PdfObject::Null);
513    }
514
515    #[test]
516    fn test_reader_resolve_reference() {
517        let pdf_data = create_minimal_pdf();
518        let cursor = Cursor::new(pdf_data);
519        let mut reader = PdfReader::new(cursor).unwrap();
520        
521        // Create a reference to catalog
522        let ref_obj = PdfObject::Reference(1, 0);
523        let resolved = reader.resolve(&ref_obj);
524        
525        assert!(resolved.is_ok());
526        assert!(resolved.unwrap().as_dict().is_some());
527    }
528
529    #[test]
530    fn test_reader_resolve_non_reference() {
531        let pdf_data = create_minimal_pdf();
532        let cursor = Cursor::new(pdf_data);
533        let mut reader = PdfReader::new(cursor).unwrap();
534        
535        // Resolve a non-reference object
536        let int_obj = PdfObject::Integer(42);
537        let resolved = reader.resolve(&int_obj).unwrap();
538        
539        assert_eq!(resolved, &PdfObject::Integer(42));
540    }
541
542    #[test]
543    fn test_reader_cache_behavior() {
544        let pdf_data = create_minimal_pdf();
545        let cursor = Cursor::new(pdf_data);
546        let mut reader = PdfReader::new(cursor).unwrap();
547        
548        // Get object first time
549        let obj1 = reader.get_object(1, 0).unwrap();
550        assert!(obj1.as_dict().is_some());
551        
552        // Get same object again - should use cache
553        let obj2 = reader.get_object(1, 0).unwrap();
554        assert!(obj2.as_dict().is_some());
555    }
556
557    #[test]
558    fn test_reader_wrong_generation() {
559        let pdf_data = create_minimal_pdf();
560        let cursor = Cursor::new(pdf_data);
561        let mut reader = PdfReader::new(cursor).unwrap();
562        
563        // Try to get object with wrong generation number
564        let obj = reader.get_object(1, 99);
565        assert!(obj.is_err());
566    }
567
568    #[test]
569    fn test_reader_invalid_pdf() {
570        let invalid_data = b"This is not a PDF file";
571        let cursor = Cursor::new(invalid_data.to_vec());
572        let result = PdfReader::new(cursor);
573        
574        assert!(result.is_err());
575    }
576
577    #[test]
578    fn test_reader_corrupt_xref() {
579        let corrupt_pdf = b"%PDF-1.4
5801 0 obj
581<< /Type /Catalog >>
582endobj
583xref
584corrupted xref table
585trailer
586<< /Size 2 /Root 1 0 R >>
587startxref
58824
589%%EOF".to_vec();
590        
591        let cursor = Cursor::new(corrupt_pdf);
592        let result = PdfReader::new(cursor);
593        assert!(result.is_err());
594    }
595
596    #[test]
597    fn test_reader_missing_trailer() {
598        let pdf_no_trailer = b"%PDF-1.4
5991 0 obj
600<< /Type /Catalog >>
601endobj
602xref
6030 2
6040000000000 65535 f 
6050000000009 00000 n 
606startxref
60724
608%%EOF".to_vec();
609        
610        let cursor = Cursor::new(pdf_no_trailer);
611        let result = PdfReader::new(cursor);
612        assert!(result.is_err());
613    }
614
615    #[test]
616    fn test_reader_empty_pdf() {
617        let cursor = Cursor::new(Vec::new());
618        let result = PdfReader::new(cursor);
619        assert!(result.is_err());
620    }
621
622    #[test]
623    fn test_reader_page_count() {
624        let pdf_data = create_minimal_pdf();
625        let cursor = Cursor::new(pdf_data);
626        let mut reader = PdfReader::new(cursor).unwrap();
627        
628        let count = reader.page_count();
629        assert!(count.is_ok());
630        assert_eq!(count.unwrap(), 0); // Minimal PDF has no pages
631    }
632
633    #[test]
634    fn test_reader_into_document() {
635        let pdf_data = create_minimal_pdf();
636        let cursor = Cursor::new(pdf_data);
637        let reader = PdfReader::new(cursor).unwrap();
638        
639        let document = reader.into_document();
640        // Document should be valid
641        let page_count = document.page_count();
642        assert!(page_count.is_ok());
643    }
644
645    #[test]
646    fn test_reader_pages_dict() {
647        let pdf_data = create_minimal_pdf();
648        let cursor = Cursor::new(pdf_data);
649        let mut reader = PdfReader::new(cursor).unwrap();
650        
651        let pages = reader.pages();
652        assert!(pages.is_ok());
653        let pages_dict = pages.unwrap();
654        assert_eq!(pages_dict.get("Type"), Some(&PdfObject::Name(PdfName("Pages".to_string()))));
655    }
656
657    #[test]
658    fn test_reader_pdf_with_binary_data() {
659        let pdf_data = create_pdf_with_binary_marker();
660        
661        let cursor = Cursor::new(pdf_data);
662        let result = PdfReader::new(cursor);
663        assert!(result.is_ok());
664    }
665
666    #[test]
667    fn test_reader_metadata() {
668        let pdf_data = create_pdf_with_info();
669        let cursor = Cursor::new(pdf_data);
670        let mut reader = PdfReader::new(cursor).unwrap();
671        
672        let metadata = reader.metadata().unwrap();
673        assert_eq!(metadata.title, Some("Test PDF".to_string()));
674        assert_eq!(metadata.author, Some("Test Author".to_string()));
675        assert_eq!(metadata.subject, Some("Testing".to_string()));
676        assert_eq!(metadata.version, "1.4".to_string());
677    }
678
679    #[test]
680    fn test_reader_metadata_empty() {
681        let pdf_data = create_minimal_pdf();
682        let cursor = Cursor::new(pdf_data);
683        let mut reader = PdfReader::new(cursor).unwrap();
684        
685        let metadata = reader.metadata().unwrap();
686        assert!(metadata.title.is_none());
687        assert!(metadata.author.is_none());
688        assert_eq!(metadata.version, "1.4".to_string());
689        assert_eq!(metadata.page_count, Some(0));
690    }
691
692    #[test]
693    fn test_reader_object_number_mismatch() {
694        // This test validates that the reader properly handles
695        // object number mismatches. We'll create a valid PDF
696        // and then try to access an object with wrong generation number
697        let pdf_data = create_minimal_pdf();
698        let cursor = Cursor::new(pdf_data);
699        let mut reader = PdfReader::new(cursor).unwrap();
700        
701        // Object 1 exists with generation 0
702        // Try to get it with wrong generation number
703        let result = reader.get_object(1, 99);
704        assert!(result.is_err());
705        
706        // Also test with a non-existent object number
707        let result2 = reader.get_object(999, 0);
708        assert!(result2.is_err());
709    }
710
711    #[test] 
712    fn test_document_metadata_struct() {
713        let metadata = DocumentMetadata {
714            title: Some("Title".to_string()),
715            author: Some("Author".to_string()),
716            subject: Some("Subject".to_string()),
717            keywords: Some("Keywords".to_string()),
718            creator: Some("Creator".to_string()),
719            producer: Some("Producer".to_string()),
720            creation_date: Some("D:20240101".to_string()),
721            modification_date: Some("D:20240102".to_string()),
722            version: "1.5".to_string(),
723            page_count: Some(10),
724        };
725        
726        assert_eq!(metadata.title, Some("Title".to_string()));
727        assert_eq!(metadata.page_count, Some(10));
728    }
729
730    #[test]
731    fn test_document_metadata_default() {
732        let metadata = DocumentMetadata::default();
733        assert!(metadata.title.is_none());
734        assert!(metadata.author.is_none());
735        assert!(metadata.subject.is_none());
736        assert!(metadata.keywords.is_none());
737        assert!(metadata.creator.is_none());
738        assert!(metadata.producer.is_none());
739        assert!(metadata.creation_date.is_none());
740        assert!(metadata.modification_date.is_none());
741        assert_eq!(metadata.version, "".to_string());
742        assert!(metadata.page_count.is_none());
743    }
744
745    #[test]
746    fn test_document_metadata_clone() {
747        let metadata = DocumentMetadata {
748            title: Some("Test".to_string()),
749            version: "1.4".to_string(),
750            ..Default::default()
751        };
752        
753        let cloned = metadata.clone();
754        assert_eq!(cloned.title, Some("Test".to_string()));
755        assert_eq!(cloned.version, "1.4".to_string());
756    }
757
758    #[test]
759    fn test_reader_trailer_validation_error() {
760        // PDF with invalid trailer (missing required keys)
761        let bad_pdf = b"%PDF-1.4
7621 0 obj
763<< /Type /Catalog >>
764endobj
765xref
7660 2
7670000000000 65535 f 
7680000000009 00000 n 
769trailer
770<< /Size 2 >>
771startxref
77246
773%%EOF".to_vec();
774        
775        let cursor = Cursor::new(bad_pdf);
776        let result = PdfReader::new(cursor);
777        assert!(result.is_err()); // Should fail because trailer is missing /Root
778    }
779}