Skip to main content

oxidize_pdf/parser/
optimized_reader.rs

1//! Optimized PDF Reader with LRU caching
2//!
3//! This module provides an optimized version of PdfReader that uses
4//! an LRU cache instead of unlimited HashMap caching to control memory usage.
5
6use super::header::PdfHeader;
7use super::object_stream::ObjectStream;
8use super::objects::{PdfDictionary, PdfObject};
9use super::stack_safe::StackSafeContext;
10use super::trailer::PdfTrailer;
11use super::xref::XRefTable;
12use super::{ParseError, ParseOptions, ParseResult};
13use crate::memory::{LruCache, MemoryOptions, MemoryStats};
14use crate::objects::ObjectId;
15use std::collections::HashMap;
16use std::fs::File;
17use std::io::{BufReader, Read, Seek, SeekFrom};
18use std::path::Path;
19use std::sync::Arc;
20
21/// Optimized PDF reader with LRU caching
22pub struct OptimizedPdfReader<R: Read + Seek> {
23    reader: BufReader<R>,
24    header: PdfHeader,
25    xref: XRefTable,
26    trailer: PdfTrailer,
27    /// LRU cache for loaded objects
28    object_cache: LruCache<ObjectId, Arc<PdfObject>>,
29    /// Cache of object streams
30    object_stream_cache: HashMap<u32, ObjectStream>,
31    /// Page tree navigator
32    #[allow(dead_code)]
33    page_tree: Option<super::page_tree::PageTree>,
34    /// Stack-safe parsing context
35    #[allow(dead_code)]
36    parse_context: StackSafeContext,
37    /// Parsing options
38    options: super::ParseOptions,
39    /// Memory options
40    #[allow(dead_code)]
41    memory_options: MemoryOptions,
42    /// Memory statistics
43    memory_stats: MemoryStats,
44}
45
46impl<R: Read + Seek> OptimizedPdfReader<R> {
47    /// Get parsing options
48    pub fn options(&self) -> &super::ParseOptions {
49        &self.options
50    }
51
52    /// Get memory statistics
53    pub fn memory_stats(&self) -> &MemoryStats {
54        &self.memory_stats
55    }
56
57    /// Clear the object cache
58    pub fn clear_cache(&mut self) {
59        self.object_cache.clear();
60        self.object_stream_cache.clear();
61        self.memory_stats.cached_objects = 0;
62    }
63}
64
65impl OptimizedPdfReader<File> {
66    /// Open a PDF file from a path with memory optimization
67    pub fn open<P: AsRef<Path>>(path: P) -> ParseResult<Self> {
68        let file = File::open(path)?;
69        let options = super::ParseOptions::lenient();
70        let memory_options = MemoryOptions::default();
71        Self::new_with_options(file, options, memory_options)
72    }
73
74    /// Open a PDF file with custom memory options
75    pub fn open_with_memory<P: AsRef<Path>>(
76        path: P,
77        memory_options: MemoryOptions,
78    ) -> ParseResult<Self> {
79        let file = File::open(path)?;
80        let options = super::ParseOptions::lenient();
81        Self::new_with_options(file, options, memory_options)
82    }
83
84    /// Open a PDF file with strict parsing
85    pub fn open_strict<P: AsRef<Path>>(path: P) -> ParseResult<Self> {
86        let file = File::open(path)?;
87        let options = super::ParseOptions::strict();
88        let memory_options = MemoryOptions::default();
89        Self::new_with_options(file, options, memory_options)
90    }
91}
92
93impl<R: Read + Seek> OptimizedPdfReader<R> {
94    /// Create a new PDF reader from a reader
95    pub fn new(reader: R) -> ParseResult<Self> {
96        Self::new_with_options(
97            reader,
98            super::ParseOptions::default(),
99            MemoryOptions::default(),
100        )
101    }
102
103    /// Create a new PDF reader with custom parsing and memory options
104    pub fn new_with_options(
105        reader: R,
106        options: super::ParseOptions,
107        memory_options: MemoryOptions,
108    ) -> ParseResult<Self> {
109        let mut buf_reader = BufReader::new(reader);
110
111        // Check if file is empty
112        let start_pos = buf_reader.stream_position()?;
113        buf_reader.seek(SeekFrom::End(0))?;
114        let file_size = buf_reader.stream_position()?;
115        buf_reader.seek(SeekFrom::Start(start_pos))?;
116
117        if file_size == 0 {
118            return Err(ParseError::EmptyFile);
119        }
120
121        // Parse header
122        let header = PdfHeader::parse(&mut buf_reader)?;
123
124        // Parse xref table
125        let xref = XRefTable::parse_with_options(&mut buf_reader, &options)?;
126
127        // Get trailer
128        let trailer_dict = xref.trailer().ok_or(ParseError::InvalidTrailer)?.clone();
129
130        let xref_offset = xref.xref_offset();
131        let trailer = PdfTrailer::from_dict(trailer_dict, xref_offset)?;
132
133        // Validate trailer
134        trailer.validate()?;
135
136        // Create LRU cache with configured size
137        let cache_size = memory_options.cache_size.max(1);
138        let object_cache = LruCache::new(cache_size);
139
140        Ok(Self {
141            reader: buf_reader,
142            header,
143            xref,
144            trailer,
145            object_cache,
146            object_stream_cache: HashMap::new(),
147            page_tree: None,
148            parse_context: StackSafeContext::new(),
149            options,
150            memory_options,
151            memory_stats: MemoryStats::default(),
152        })
153    }
154
155    /// Get the PDF version
156    pub fn version(&self) -> &super::header::PdfVersion {
157        &self.header.version
158    }
159
160    /// Get the document catalog
161    pub fn catalog(&mut self) -> ParseResult<&PdfDictionary> {
162        // Try to get root from trailer
163        let (obj_num, gen_num) = match self.trailer.root() {
164            Ok(root) => root,
165            Err(_) => {
166                // If Root is missing, try fallback methods
167                #[cfg(debug_assertions)]
168                tracing::debug!("Warning: Trailer missing Root entry, attempting recovery");
169
170                // First try the fallback method
171                if let Some(root) = self.trailer.find_root_fallback() {
172                    root
173                } else {
174                    // Last resort: scan for Catalog object
175                    if let Ok(catalog_ref) = self.find_catalog_object() {
176                        catalog_ref
177                    } else {
178                        return Err(ParseError::MissingKey("Root".to_string()));
179                    }
180                }
181            }
182        };
183
184        let catalog = self.get_object(obj_num, gen_num)?;
185
186        catalog.as_dict().ok_or_else(|| ParseError::SyntaxError {
187            position: 0,
188            message: "Catalog is not a dictionary".to_string(),
189        })
190    }
191
192    /// Get the document info dictionary
193    pub fn info(&mut self) -> ParseResult<Option<&PdfDictionary>> {
194        match self.trailer.info() {
195            Some((obj_num, gen_num)) => {
196                let info = self.get_object(obj_num, gen_num)?;
197                Ok(info.as_dict())
198            }
199            None => Ok(None),
200        }
201    }
202
203    /// Get an object by reference
204    pub fn get_object(&mut self, obj_num: u32, gen_num: u16) -> ParseResult<&PdfObject> {
205        let object_id = ObjectId::new(obj_num, gen_num);
206
207        // Check LRU cache first
208        if let Some(cached_obj) = self.object_cache.get(&object_id) {
209            self.memory_stats.cache_hits += 1;
210            // Convert Arc<PdfObject> to &PdfObject
211            // This is safe because we maintain the Arc in the cache
212            let ptr = Arc::as_ptr(cached_obj);
213            return Ok(unsafe { &*ptr });
214        }
215
216        self.memory_stats.cache_misses += 1;
217
218        // Load object from disk
219        let obj = self.load_object_from_disk(obj_num, gen_num)?;
220
221        // Store in LRU cache
222        let arc_obj = Arc::new(obj);
223        self.object_cache.put(object_id, arc_obj);
224        self.memory_stats.cached_objects = self.object_cache.len();
225
226        // Return reference to cached object
227        // The Arc is owned by the cache, so we can safely return a reference
228        // We need to get it from the cache to ensure lifetime
229        self.object_cache
230            .get(&object_id)
231            .map(|arc| unsafe { &*Arc::as_ptr(arc) })
232            .ok_or(ParseError::SyntaxError {
233                position: 0,
234                message: "Object not in cache after insertion".to_string(),
235            })
236    }
237
238    /// Internal method to load an object from disk
239    fn load_object_from_disk(&mut self, obj_num: u32, gen_num: u16) -> ParseResult<PdfObject> {
240        // Check if this is a compressed object
241        if let Some(ext_entry) = self.xref.get_extended_entry(obj_num) {
242            if let Some((stream_obj_num, index_in_stream)) = ext_entry.compressed_info {
243                // This is a compressed object - need to extract from object stream
244                return self.get_compressed_object_direct(
245                    obj_num,
246                    gen_num,
247                    stream_obj_num,
248                    index_in_stream,
249                );
250            }
251        }
252
253        // Get xref entry
254        let entry = self
255            .xref
256            .get_entry(obj_num)
257            .ok_or(ParseError::InvalidReference(obj_num, gen_num))?;
258
259        if !entry.in_use {
260            // Free object
261            return Ok(PdfObject::Null);
262        }
263
264        if entry.generation != gen_num {
265            return Err(ParseError::InvalidReference(obj_num, gen_num));
266        }
267
268        // Seek to object position
269        self.reader.seek(std::io::SeekFrom::Start(entry.offset))?;
270
271        // Parse object header (obj_num gen_num obj)
272        let mut lexer =
273            super::lexer::Lexer::new_with_options(&mut self.reader, self.options.clone());
274
275        // Read object number with recovery
276        let token = lexer.next_token()?;
277        let read_obj_num = match token {
278            super::lexer::Token::Integer(n) => n as u32,
279            _ => {
280                // Try fallback recovery
281                if self.options.lenient_syntax {
282                    if self.options.collect_warnings {
283                        tracing::debug!(
284                            "Warning: Using expected object number {obj_num} instead of parsed token"
285                        );
286                    }
287                    obj_num
288                } else {
289                    return Err(ParseError::SyntaxError {
290                        position: entry.offset as usize,
291                        message: "Expected object number".to_string(),
292                    });
293                }
294            }
295        };
296
297        if read_obj_num != obj_num && !self.options.lenient_syntax {
298            return Err(ParseError::SyntaxError {
299                position: entry.offset as usize,
300                message: format!(
301                    "Object number mismatch: expected {obj_num}, found {read_obj_num}"
302                ),
303            });
304        }
305
306        // Read generation number
307        let token = lexer.next_token()?;
308        let read_gen_num = match token {
309            super::lexer::Token::Integer(n) => n as u16,
310            _ => {
311                if self.options.lenient_syntax {
312                    if self.options.collect_warnings {
313                        tracing::debug!(
314                            "Warning: Using generation 0 instead of parsed token for object {obj_num}"
315                        );
316                    }
317                    0
318                } else {
319                    return Err(ParseError::SyntaxError {
320                        position: entry.offset as usize,
321                        message: "Expected generation number".to_string(),
322                    });
323                }
324            }
325        };
326
327        if read_gen_num != gen_num && !self.options.lenient_syntax {
328            return Err(ParseError::SyntaxError {
329                position: entry.offset as usize,
330                message: format!(
331                    "Generation number mismatch: expected {gen_num}, found {read_gen_num}"
332                ),
333            });
334        }
335
336        // Read 'obj' keyword
337        let token = lexer.next_token()?;
338        match token {
339            super::lexer::Token::Obj => {}
340            _ => {
341                if self.options.lenient_syntax {
342                    if self.options.collect_warnings {
343                        tracing::debug!("Warning: Missing 'obj' keyword for object {obj_num}");
344                    }
345                } else {
346                    return Err(ParseError::SyntaxError {
347                        position: entry.offset as usize,
348                        message: "Expected 'obj' keyword".to_string(),
349                    });
350                }
351            }
352        }
353
354        // Parse the object
355        let object = PdfObject::parse(&mut lexer)?;
356
357        // Skip 'endobj' if present
358        if let Ok(token) = lexer.peek_token() {
359            if let super::lexer::Token::EndObj = token {
360                let _ = lexer.next_token();
361            } else if !self.options.lenient_syntax && self.options.collect_warnings {
362                tracing::debug!("Warning: Missing 'endobj' for object {obj_num}");
363            }
364        }
365
366        Ok(object)
367    }
368
369    /// Get a compressed object directly (returns owned object)
370    fn get_compressed_object_direct(
371        &mut self,
372        obj_num: u32,
373        _gen_num: u16,
374        stream_obj_num: u32,
375        _index_in_stream: u32,
376    ) -> ParseResult<PdfObject> {
377        // First get the object stream
378        if !self.object_stream_cache.contains_key(&stream_obj_num) {
379            // Load the stream object
380            let stream_obj = self.load_object_from_disk(stream_obj_num, 0)?;
381
382            if let PdfObject::Stream(stream) = stream_obj {
383                let obj_stream = ObjectStream::parse(stream, &ParseOptions::default())?;
384                self.object_stream_cache.insert(stream_obj_num, obj_stream);
385            } else {
386                return Err(ParseError::SyntaxError {
387                    position: 0,
388                    message: "Object stream is not a stream object".to_string(),
389                });
390            }
391        }
392
393        // Get object from stream
394        let obj_stream = self
395            .object_stream_cache
396            .get(&stream_obj_num)
397            .ok_or_else(|| ParseError::SyntaxError {
398                position: 0,
399                message: "Object stream not found in cache".to_string(),
400            })?;
401
402        obj_stream
403            .get_object(obj_num)
404            .cloned()
405            .ok_or(ParseError::InvalidReference(obj_num, 0))
406    }
407
408    /// Find catalog object by scanning (fallback method)
409    fn find_catalog_object(&mut self) -> ParseResult<(u32, u16)> {
410        // This is a simplified implementation
411        // In a real scenario, we would scan through objects to find the catalog
412        for obj_num in 1..100 {
413            if let Ok(PdfObject::Dictionary(dict)) = self.get_object(obj_num, 0) {
414                if let Some(PdfObject::Name(type_name)) = dict.get("Type") {
415                    if type_name.0.as_bytes() == b"Catalog" {
416                        return Ok((obj_num, 0));
417                    }
418                }
419            }
420        }
421        Err(ParseError::MissingKey("Catalog".to_string()))
422    }
423
424    /// Get a reference to the inner reader
425    pub fn reader(&mut self) -> &mut BufReader<R> {
426        &mut self.reader
427    }
428}
429
430/// Helper function to get memory usage info for a PdfObject
431pub fn estimate_object_size(obj: &PdfObject) -> usize {
432    match obj {
433        PdfObject::Null => 8,
434        PdfObject::Boolean(_) => 16,
435        PdfObject::Integer(_) => 16,
436        PdfObject::Real(_) => 16,
437        PdfObject::String(s) => 24 + s.as_bytes().len(),
438        PdfObject::Name(n) => 24 + n.0.len(),
439        PdfObject::Array(arr) => {
440            24 + arr.len() * 8 + arr.0.iter().map(estimate_object_size).sum::<usize>()
441        }
442        PdfObject::Dictionary(dict) => {
443            24 + dict.0.len() * 16
444                + dict
445                    .0
446                    .iter()
447                    .map(|(k, v)| k.0.len() + estimate_object_size(v))
448                    .sum::<usize>()
449        }
450        PdfObject::Stream(s) => {
451            48 + s.data.len() + estimate_object_size(&PdfObject::Dictionary(s.dict.clone()))
452        }
453        PdfObject::Reference(_, _) => 16,
454    }
455}
456
457#[cfg(test)]
458mod tests {
459    use super::*;
460    use crate::parser::objects::{PdfArray, PdfDictionary, PdfName, PdfStream, PdfString};
461    use std::io::Cursor;
462
463    fn create_minimal_pdf() -> Vec<u8> {
464        // Offsets calculated from actual file:
465        // Header: 0-9
466        // Object 1: 9-58   (offset: 0000000009)
467        // Object 2: 58-115 (offset: 0000000058)
468        // Object 3: 115-186 (offset: 0000000115)
469        // XRef table: 186 (startxref: 186)
470        b"%PDF-1.4\n\
4711 0 obj\n\
472<< /Type /Catalog /Pages 2 0 R >>\n\
473endobj\n\
4742 0 obj\n\
475<< /Type /Pages /Kids [3 0 R] /Count 1 >>\n\
476endobj\n\
4773 0 obj\n\
478<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\n\
479endobj\n\
480xref\n\
4810 4\n\
4820000000000 65535 f \n\
4830000000009 00000 n \n\
4840000000058 00000 n \n\
4850000000115 00000 n \n\
486trailer\n\
487<< /Size 4 /Root 1 0 R >>\n\
488startxref\n\
489186\n\
490%%EOF\n"
491            .to_vec()
492    }
493
494    fn create_empty_pdf() -> Vec<u8> {
495        Vec::new()
496    }
497
498    fn create_invalid_pdf() -> Vec<u8> {
499        b"Not a PDF file".to_vec()
500    }
501
502    #[test]
503    fn test_memory_options_integration() {
504        let options = MemoryOptions::default().with_cache_size(100);
505        assert_eq!(options.cache_size, 100);
506
507        let options = MemoryOptions::default().with_cache_size(0);
508        assert_eq!(options.cache_size, 0);
509    }
510
511    #[test]
512    fn test_object_size_estimation_basic_types() {
513        // Null
514        let obj = PdfObject::Null;
515        assert_eq!(estimate_object_size(&obj), 8);
516
517        // Boolean
518        let obj = PdfObject::Boolean(true);
519        assert_eq!(estimate_object_size(&obj), 16);
520
521        let obj = PdfObject::Boolean(false);
522        assert_eq!(estimate_object_size(&obj), 16);
523
524        // Integer
525        let obj = PdfObject::Integer(42);
526        assert_eq!(estimate_object_size(&obj), 16);
527
528        let obj = PdfObject::Integer(-1000);
529        assert_eq!(estimate_object_size(&obj), 16);
530
531        // Real
532        let obj = PdfObject::Real(3.14159);
533        assert_eq!(estimate_object_size(&obj), 16);
534
535        // Reference
536        let obj = PdfObject::Reference(5, 0);
537        assert_eq!(estimate_object_size(&obj), 16);
538    }
539
540    #[test]
541    fn test_object_size_estimation_string_types() {
542        // Empty string
543        let obj = PdfObject::String(PdfString::new(b"".to_vec()));
544        assert_eq!(estimate_object_size(&obj), 24);
545
546        // Short string
547        let obj = PdfObject::String(PdfString::new(b"Hello".to_vec()));
548        assert_eq!(estimate_object_size(&obj), 24 + 5);
549
550        // Long string
551        let long_text = "A".repeat(1000);
552        let obj = PdfObject::String(PdfString::new(long_text.as_bytes().to_vec()));
553        assert_eq!(estimate_object_size(&obj), 24 + 1000);
554
555        // Name objects
556        let obj = PdfObject::Name(PdfName::new("Type".to_string()));
557        assert_eq!(estimate_object_size(&obj), 24 + 4);
558
559        let obj = PdfObject::Name(PdfName::new("".to_string()));
560        assert_eq!(estimate_object_size(&obj), 24);
561    }
562
563    #[test]
564    fn test_object_size_estimation_array() {
565        // Empty array
566        let obj = PdfObject::Array(PdfArray(vec![]));
567        assert_eq!(estimate_object_size(&obj), 24);
568
569        // Simple array
570        let obj = PdfObject::Array(PdfArray(vec![
571            PdfObject::Integer(1),
572            PdfObject::Integer(2),
573            PdfObject::Integer(3),
574        ]));
575        assert_eq!(estimate_object_size(&obj), 24 + 3 * 8 + 3 * 16);
576
577        // Nested array
578        let inner_array = PdfObject::Array(PdfArray(vec![
579            PdfObject::Integer(10),
580            PdfObject::Integer(20),
581        ]));
582        let obj = PdfObject::Array(PdfArray(vec![PdfObject::Integer(1), inner_array]));
583        let expected = 24 + 2 * 8 + 16 + (24 + 2 * 8 + 2 * 16);
584        assert_eq!(estimate_object_size(&obj), expected);
585    }
586
587    #[test]
588    fn test_object_size_estimation_dictionary() {
589        // Empty dictionary
590        let obj = PdfObject::Dictionary(PdfDictionary::new());
591        assert_eq!(estimate_object_size(&obj), 24);
592
593        // Simple dictionary
594        let mut dict = PdfDictionary::new();
595        dict.insert(
596            "Type".to_string(),
597            PdfObject::Name(PdfName::new("Catalog".to_string())),
598        );
599        dict.insert("Count".to_string(), PdfObject::Integer(5));
600
601        let obj = PdfObject::Dictionary(dict);
602        let expected = 24 + 2 * 16 + (4 + 24 + 7) + (5 + 16);
603        assert_eq!(estimate_object_size(&obj), expected);
604    }
605
606    #[test]
607    fn test_object_size_estimation_stream() {
608        let mut dict = PdfDictionary::new();
609        dict.insert("Length".to_string(), PdfObject::Integer(10));
610
611        let stream = PdfObject::Stream(PdfStream {
612            dict: dict.clone(),
613            data: b"Hello Test".to_vec(),
614        });
615
616        let dict_size = estimate_object_size(&PdfObject::Dictionary(dict));
617        let expected = 48 + 10 + dict_size;
618        assert_eq!(estimate_object_size(&stream), expected);
619    }
620
621    #[test]
622    fn test_object_size_estimation_complex_structure() {
623        // Complex nested structure
624        let mut inner_dict = PdfDictionary::new();
625        inner_dict.insert(
626            "Font".to_string(),
627            PdfObject::Name(PdfName::new("Helvetica".to_string())),
628        );
629        inner_dict.insert("Size".to_string(), PdfObject::Integer(12));
630
631        let array = PdfObject::Array(PdfArray(vec![
632            PdfObject::String(PdfString::new(b"Text content".to_vec())),
633            PdfObject::Dictionary(inner_dict),
634            PdfObject::Reference(10, 0),
635        ]));
636
637        let mut main_dict = PdfDictionary::new();
638        main_dict.insert(
639            "Type".to_string(),
640            PdfObject::Name(PdfName::new("Page".to_string())),
641        );
642        main_dict.insert("Contents".to_string(), array);
643
644        let obj = PdfObject::Dictionary(main_dict);
645
646        // The size should be > 0 and reasonable
647        let size = estimate_object_size(&obj);
648        assert!(size > 100);
649        assert!(size < 1000);
650    }
651
652    #[test]
653    fn test_optimized_reader_empty_file() {
654        let data = create_empty_pdf();
655        let cursor = Cursor::new(data);
656
657        let result = OptimizedPdfReader::new(cursor);
658        assert!(result.is_err());
659        if let Err(ParseError::EmptyFile) = result {
660            // Expected error
661        } else {
662            panic!("Expected EmptyFile error");
663        }
664    }
665
666    #[test]
667    fn test_optimized_reader_invalid_file() {
668        let data = create_invalid_pdf();
669        let cursor = Cursor::new(data);
670
671        let result = OptimizedPdfReader::new(cursor);
672        assert!(result.is_err());
673        // Should fail during header parsing
674    }
675
676    #[test]
677    fn test_optimized_reader_creation_with_options() {
678        let data = create_minimal_pdf();
679        let cursor = Cursor::new(data);
680
681        let parse_options = ParseOptions {
682            lenient_syntax: true,
683            collect_warnings: false,
684            ..Default::default()
685        };
686
687        let memory_options = MemoryOptions::default().with_cache_size(50);
688
689        let result = OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options);
690        if result.is_err() {
691            // Skip test if PDF parsing fails due to incomplete implementation
692            return;
693        }
694
695        let reader = result.unwrap();
696        assert!(reader.options().lenient_syntax);
697        assert!(!reader.options().collect_warnings);
698    }
699
700    #[test]
701    fn test_optimized_reader_version_access() {
702        let data = create_minimal_pdf();
703        let cursor = Cursor::new(data);
704
705        let result = OptimizedPdfReader::new(cursor);
706        if result.is_err() {
707            // Skip test if PDF parsing fails
708            return;
709        }
710
711        let reader = result.unwrap();
712        let version = reader.version();
713
714        // Should have parsed version from %PDF-1.4
715        assert_eq!(version.major, 1);
716        assert_eq!(version.minor, 4);
717    }
718
719    #[test]
720    fn test_memory_options_validation() {
721        let data = create_minimal_pdf();
722        let cursor = Cursor::new(data);
723
724        // Test that cache size of 0 gets converted to 1
725        let memory_options = MemoryOptions::default().with_cache_size(0);
726        let parse_options = ParseOptions::default();
727
728        let result = OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options);
729        if result.is_err() {
730            // The memory option validation should still work even if PDF parsing fails
731            let memory_opts = MemoryOptions::default().with_cache_size(0);
732            let cache_size = memory_opts.cache_size.max(1);
733            assert_eq!(cache_size, 1);
734        }
735    }
736
737    #[test]
738    fn test_estimate_object_size_edge_cases() {
739        // Very large array
740        let large_array = PdfObject::Array(PdfArray((0..1000).map(PdfObject::Integer).collect()));
741        let size = estimate_object_size(&large_array);
742        assert!(size > 16000); // Should be substantial
743
744        // Very large dictionary
745        let mut large_dict = PdfDictionary::new();
746        for i in 0..100 {
747            large_dict.insert(
748                format!("Key{i}"),
749                PdfObject::String(PdfString::new(format!("Value{i}").as_bytes().to_vec())),
750            );
751        }
752        let obj = PdfObject::Dictionary(large_dict);
753        let size = estimate_object_size(&obj);
754        assert!(size > 1000);
755    }
756
757    #[test]
758    fn test_memory_options_default_values() {
759        let options = MemoryOptions::default();
760
761        // Verify reasonable defaults
762        assert!(options.cache_size > 0);
763        assert!(options.cache_size < 10000); // Should be reasonable
764    }
765
766    #[test]
767    fn test_memory_options_builder_pattern() {
768        let options = MemoryOptions::default().with_cache_size(500);
769
770        assert_eq!(options.cache_size, 500);
771    }
772
773    #[test]
774    fn test_object_size_estimation_consistency() {
775        // Same objects should have same size
776        let obj1 = PdfObject::String(PdfString::new(b"Test".to_vec()));
777        let obj2 = PdfObject::String(PdfString::new(b"Test".to_vec()));
778
779        assert_eq!(estimate_object_size(&obj1), estimate_object_size(&obj2));
780
781        // Different content should have different sizes
782        let obj3 = PdfObject::String(PdfString::new(b"Different".to_vec()));
783        assert_ne!(estimate_object_size(&obj1), estimate_object_size(&obj3));
784    }
785
786    #[test]
787    fn test_object_size_estimation_zero_values() {
788        // Integer zero
789        let obj = PdfObject::Integer(0);
790        assert_eq!(estimate_object_size(&obj), 16);
791
792        // Real zero
793        let obj = PdfObject::Real(0.0);
794        assert_eq!(estimate_object_size(&obj), 16);
795
796        // Reference zero
797        let obj = PdfObject::Reference(0, 0);
798        assert_eq!(estimate_object_size(&obj), 16);
799    }
800
801    #[test]
802    fn test_object_size_estimation_negative_values() {
803        let obj = PdfObject::Integer(-42);
804        assert_eq!(estimate_object_size(&obj), 16);
805
806        let obj = PdfObject::Real(-3.14159);
807        assert_eq!(estimate_object_size(&obj), 16);
808    }
809
810    #[test]
811    fn test_object_size_estimation_unicode_strings() {
812        // Unicode string
813        let unicode_text = "Hello 世界 🌍";
814        let obj = PdfObject::String(PdfString::new(unicode_text.as_bytes().to_vec()));
815        let expected_size = 24 + unicode_text.len();
816        assert_eq!(estimate_object_size(&obj), expected_size);
817    }
818
819    #[test]
820    fn test_object_size_estimation_mixed_array() {
821        let obj = PdfObject::Array(PdfArray(vec![
822            PdfObject::Null,
823            PdfObject::Boolean(true),
824            PdfObject::Integer(42),
825            PdfObject::Real(3.14),
826            PdfObject::String(PdfString::new(b"test".to_vec())),
827            PdfObject::Name(PdfName::new("Name".to_string())),
828            PdfObject::Reference(1, 0),
829        ]));
830
831        let expected = 24 + 7 * 8 + 8 + 16 + 16 + 16 + (24 + 4) + (24 + 4) + 16;
832        assert_eq!(estimate_object_size(&obj), expected);
833    }
834
835    #[test]
836    fn test_find_catalog_object_range() {
837        // Test that find_catalog_object scans a reasonable range
838        // This is mainly testing the logic bounds - it scans objects 1-99
839        let data = create_minimal_pdf();
840        let cursor = Cursor::new(data);
841
842        // We can't easily test the actual scanning without a real PDF,
843        // but we can verify the implementation exists and has reasonable bounds
844        if let Ok(mut reader) = OptimizedPdfReader::new(cursor) {
845            // The method exists and should scan objects 1-99
846            // In a real test with proper PDF, this would find the catalog
847            let _result = reader.find_catalog_object();
848            // Result depends on the actual PDF content, so we don't assert specific outcomes
849        }
850    }
851
852    #[test]
853    fn test_memory_stats_tracking() {
854        // Test that memory stats are properly initialized
855        let data = create_minimal_pdf();
856        let cursor = Cursor::new(data);
857
858        if let Ok(reader) = OptimizedPdfReader::new(cursor) {
859            // Memory stats should be initialized
860            assert_eq!(reader.memory_stats.cache_hits, 0);
861            assert_eq!(reader.memory_stats.cache_misses, 0);
862            assert_eq!(reader.memory_stats.cached_objects, 0);
863        }
864    }
865
866    // =============================================================================
867    // RIGOROUS TESTS FOR OPTIMIZED READER
868    // =============================================================================
869
870    mod rigorous {
871        use super::*;
872
873        #[test]
874        fn test_lru_cache_hit_tracking() {
875            let data = create_minimal_pdf();
876            let cursor = Cursor::new(data);
877
878            let mut reader =
879                OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
880
881            // Initial state: 0 hits, 0 misses
882            assert_eq!(
883                reader.memory_stats().cache_hits,
884                0,
885                "Cache hits must start at 0"
886            );
887            assert_eq!(
888                reader.memory_stats().cache_misses,
889                0,
890                "Cache misses must start at 0"
891            );
892
893            // First access: should be cache miss
894            let _ = reader.get_object(1, 0);
895            assert_eq!(
896                reader.memory_stats().cache_misses,
897                1,
898                "First access must be cache miss"
899            );
900            assert_eq!(reader.memory_stats().cache_hits, 0, "No cache hits yet");
901
902            // Second access: should be cache hit
903            let _ = reader.get_object(1, 0);
904            assert_eq!(
905                reader.memory_stats().cache_hits,
906                1,
907                "Second access must be cache hit"
908            );
909            assert_eq!(
910                reader.memory_stats().cache_misses,
911                1,
912                "Cache misses unchanged"
913            );
914
915            // Third access: another cache hit
916            let _ = reader.get_object(1, 0);
917            assert_eq!(
918                reader.memory_stats().cache_hits,
919                2,
920                "Third access must increment cache hits"
921            );
922        }
923
924        #[test]
925        fn test_lru_cache_capacity_enforcement() {
926            let data = create_minimal_pdf();
927            let cursor = Cursor::new(data);
928
929            // Create reader with small cache (size 2)
930            let memory_options = MemoryOptions::default().with_cache_size(2);
931            let parse_options = ParseOptions::default();
932
933            let mut reader =
934                OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
935                    .expect("Minimal PDF must parse successfully");
936
937            // Load object 1 (cache size: 1)
938            let _ = reader.get_object(1, 0);
939            assert_eq!(
940                reader.memory_stats().cached_objects,
941                1,
942                "Cache should have 1 object"
943            );
944
945            // Load object 2 (cache size: 2)
946            let _ = reader.get_object(2, 0);
947            assert_eq!(
948                reader.memory_stats().cached_objects,
949                2,
950                "Cache should have 2 objects"
951            );
952
953            // Load object 3 (cache size: still 2, evicts LRU)
954            let _ = reader.get_object(3, 0);
955            assert_eq!(
956                reader.memory_stats().cached_objects,
957                2,
958                "Cache must not exceed capacity of 2"
959            );
960        }
961
962        #[test]
963        fn test_cache_clear_resets_stats() {
964            let data = create_minimal_pdf();
965            let cursor = Cursor::new(data);
966
967            let mut reader =
968                OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
969
970            // Load some objects
971            let _ = reader.get_object(1, 0);
972            let _ = reader.get_object(1, 0); // cache hit
973
974            // Verify stats before clear
975            assert!(reader.memory_stats().cache_hits > 0);
976            assert!(reader.memory_stats().cached_objects > 0);
977
978            // Clear cache
979            reader.clear_cache();
980
981            // Cached objects should be 0 after clear
982            assert_eq!(
983                reader.memory_stats().cached_objects,
984                0,
985                "Cache should be empty after clear"
986            );
987
988            // Stats for hits/misses remain (cumulative)
989            // But next access will be miss
990            let _ = reader.get_object(1, 0);
991            assert!(
992                reader.memory_stats().cache_misses >= 2,
993                "Access after clear must be cache miss"
994            );
995        }
996
997        #[test]
998        fn test_empty_file_error_handling() {
999            let data = create_empty_pdf();
1000            let cursor = Cursor::new(data);
1001
1002            let result = OptimizedPdfReader::new(cursor);
1003
1004            assert!(result.is_err(), "Empty file must return error");
1005            match result {
1006                Err(ParseError::EmptyFile) => {
1007                    // Expected specific error type
1008                }
1009                Err(other) => panic!("Expected EmptyFile error, got: {:?}", other),
1010                Ok(_) => panic!("Should not succeed with empty file"),
1011            }
1012        }
1013
1014        #[test]
1015        fn test_invalid_header_error_handling() {
1016            let data = create_invalid_pdf();
1017            let cursor = Cursor::new(data);
1018
1019            let result = OptimizedPdfReader::new(cursor);
1020
1021            assert!(result.is_err(), "Invalid PDF must return error");
1022            // Error should occur during header parsing
1023        }
1024
1025        #[test]
1026        fn test_version_parsing_exact_values() {
1027            let data = create_minimal_pdf();
1028            let cursor = Cursor::new(data);
1029
1030            let reader =
1031                OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
1032
1033            let version = reader.version();
1034
1035            // Minimal PDF is version 1.4
1036            assert_eq!(version.major, 1, "PDF major version must be 1");
1037            assert_eq!(version.minor, 4, "PDF minor version must be 4");
1038        }
1039
1040        #[test]
1041        fn test_options_accessibility() {
1042            let data = create_minimal_pdf();
1043            let cursor = Cursor::new(data);
1044
1045            let parse_options = ParseOptions {
1046                lenient_syntax: true,
1047                collect_warnings: false,
1048                ..Default::default()
1049            };
1050            let memory_options = MemoryOptions::default().with_cache_size(100);
1051
1052            let reader =
1053                OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
1054                    .expect("Minimal PDF must parse successfully");
1055
1056            let opts = reader.options();
1057
1058            assert_eq!(
1059                opts.lenient_syntax, true,
1060                "Options must match provided values"
1061            );
1062            assert_eq!(
1063                opts.collect_warnings, false,
1064                "Options must match provided values"
1065            );
1066        }
1067
1068        #[test]
1069        fn test_catalog_access_requires_valid_trailer() {
1070            let data = create_minimal_pdf();
1071            let cursor = Cursor::new(data);
1072
1073            let mut reader =
1074                OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
1075
1076            // Catalog should be accessible
1077            let catalog_result = reader.catalog();
1078
1079            if catalog_result.is_ok() {
1080                let catalog = catalog_result.unwrap();
1081
1082                // Catalog must be a dictionary with Type = Catalog
1083                assert_eq!(
1084                    catalog.get("Type"),
1085                    Some(&PdfObject::Name(PdfName("Catalog".to_string()))),
1086                    "Catalog must have /Type /Catalog"
1087                );
1088            } else {
1089                // If catalog fails, should be specific error
1090                assert!(matches!(
1091                    catalog_result.unwrap_err(),
1092                    ParseError::MissingKey(_) | ParseError::SyntaxError { .. }
1093                ));
1094            }
1095        }
1096
1097        #[test]
1098        fn test_info_none_when_absent() {
1099            let data = create_minimal_pdf();
1100            let cursor = Cursor::new(data);
1101
1102            let mut reader =
1103                OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
1104
1105            let info_result = reader.info();
1106
1107            if info_result.is_ok() {
1108                let info = info_result.unwrap();
1109                // Minimal PDF has no Info dictionary in trailer
1110                assert!(
1111                    info.is_none(),
1112                    "Info should be None when not present in trailer"
1113                );
1114            }
1115        }
1116
1117        #[test]
1118        fn test_get_object_wrong_generation() {
1119            let data = create_minimal_pdf();
1120            let cursor = Cursor::new(data);
1121
1122            let mut reader =
1123                OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
1124
1125            // Object 1 0 exists, but try accessing with wrong generation
1126            let result = reader.get_object(1, 5); // Wrong generation number
1127
1128            // Should either return error or Null for free object
1129            if result.is_err() {
1130                assert!(matches!(
1131                    result.unwrap_err(),
1132                    ParseError::InvalidReference(_, _)
1133                ));
1134            }
1135        }
1136
1137        #[test]
1138        fn test_get_nonexistent_object() {
1139            let data = create_minimal_pdf();
1140            let cursor = Cursor::new(data);
1141
1142            let mut reader =
1143                OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
1144
1145            // Try accessing object that doesn't exist
1146            let result = reader.get_object(9999, 0);
1147
1148            assert!(
1149                result.is_err(),
1150                "Accessing nonexistent object must return error"
1151            );
1152            assert!(matches!(
1153                result.unwrap_err(),
1154                ParseError::InvalidReference(_, _)
1155            ));
1156        }
1157
1158        #[test]
1159        fn test_memory_options_min_cache_size() {
1160            let data = create_minimal_pdf();
1161            let cursor = Cursor::new(data);
1162
1163            // Even with cache_size = 0, implementation enforces minimum of 1
1164            let memory_options = MemoryOptions::default().with_cache_size(0);
1165            let parse_options = ParseOptions::default();
1166
1167            let mut reader =
1168                OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
1169                    .expect("Minimal PDF must parse successfully");
1170
1171            // Should be able to cache at least 1 object
1172            let _ = reader.get_object(1, 0);
1173            assert_eq!(
1174                reader.memory_stats().cached_objects,
1175                1,
1176                "Must cache at least 1 object even with cache_size=0"
1177            );
1178        }
1179
1180        #[test]
1181        fn test_estimate_object_size_exact_values() {
1182            // Test exact size calculations for primitive types
1183
1184            // Null: 8 bytes
1185            assert_eq!(estimate_object_size(&PdfObject::Null), 8);
1186
1187            // Boolean: 16 bytes
1188            assert_eq!(estimate_object_size(&PdfObject::Boolean(true)), 16);
1189            assert_eq!(estimate_object_size(&PdfObject::Boolean(false)), 16);
1190
1191            // Integer: 16 bytes
1192            assert_eq!(estimate_object_size(&PdfObject::Integer(0)), 16);
1193            assert_eq!(estimate_object_size(&PdfObject::Integer(42)), 16);
1194            assert_eq!(estimate_object_size(&PdfObject::Integer(-1000)), 16);
1195
1196            // Real: 16 bytes
1197            assert_eq!(estimate_object_size(&PdfObject::Real(0.0)), 16);
1198            assert_eq!(estimate_object_size(&PdfObject::Real(3.14159)), 16);
1199
1200            // Reference: 16 bytes
1201            assert_eq!(estimate_object_size(&PdfObject::Reference(1, 0)), 16);
1202            assert_eq!(estimate_object_size(&PdfObject::Reference(999, 5)), 16);
1203        }
1204
1205        #[test]
1206        fn test_estimate_string_size_formula() {
1207            // String size = 24 + byte_length
1208
1209            // Empty string
1210            let empty = PdfObject::String(PdfString::new(vec![]));
1211            assert_eq!(estimate_object_size(&empty), 24);
1212
1213            // 10 bytes
1214            let ten_bytes = PdfObject::String(PdfString::new(b"0123456789".to_vec()));
1215            assert_eq!(estimate_object_size(&ten_bytes), 24 + 10);
1216
1217            // 100 bytes
1218            let hundred_bytes = PdfObject::String(PdfString::new(vec![b'X'; 100]));
1219            assert_eq!(estimate_object_size(&hundred_bytes), 24 + 100);
1220        }
1221
1222        #[test]
1223        fn test_estimate_array_size_formula() {
1224            // Array size = 24 + (len * 8) + sum(element sizes)
1225
1226            // Empty array: 24
1227            let empty = PdfObject::Array(PdfArray(vec![]));
1228            assert_eq!(estimate_object_size(&empty), 24);
1229
1230            // 3 integers: 24 + (3*8) + (3*16) = 24 + 24 + 48 = 96
1231            let three_ints = PdfObject::Array(PdfArray(vec![
1232                PdfObject::Integer(1),
1233                PdfObject::Integer(2),
1234                PdfObject::Integer(3),
1235            ]));
1236            assert_eq!(estimate_object_size(&three_ints), 24 + 24 + 48);
1237        }
1238
1239        #[test]
1240        fn test_estimate_dictionary_size_formula() {
1241            // Dictionary size = 24 + (len * 16) + sum(key_len + value_size)
1242
1243            // Empty dict: 24
1244            let empty = PdfObject::Dictionary(PdfDictionary::new());
1245            assert_eq!(estimate_object_size(&empty), 24);
1246
1247            // Single entry: 24 + 16 + ("Type".len + Name("Page").size)
1248            let mut dict = PdfDictionary::new();
1249            dict.insert(
1250                "Type".to_string(),
1251                PdfObject::Name(PdfName::new("Page".to_string())),
1252            );
1253            let obj = PdfObject::Dictionary(dict);
1254            let expected = 24 + 16 + 4 + (24 + 4); // key_len=4, name_size=24+4
1255            assert_eq!(estimate_object_size(&obj), expected);
1256        }
1257
1258        #[test]
1259        fn test_cache_isolation_between_instances() {
1260            let data = create_minimal_pdf();
1261
1262            // Create two independent readers
1263            let cursor1 = Cursor::new(data.clone());
1264            let cursor2 = Cursor::new(data);
1265
1266            let mut reader1 =
1267                OptimizedPdfReader::new(cursor1).expect("Minimal PDF must parse successfully");
1268            let mut reader2 =
1269                OptimizedPdfReader::new(cursor2).expect("Minimal PDF must parse successfully");
1270
1271            // Load object in reader1
1272            let _ = reader1.get_object(1, 0);
1273            assert_eq!(reader1.memory_stats().cached_objects, 1);
1274
1275            // reader2 should have independent cache (empty)
1276            assert_eq!(
1277                reader2.memory_stats().cached_objects,
1278                0,
1279                "Readers must have independent caches"
1280            );
1281
1282            // Load in reader2
1283            let _ = reader2.get_object(1, 0);
1284            assert_eq!(
1285                reader2.memory_stats().cached_objects,
1286                1,
1287                "reader2 cache should now have 1 object"
1288            );
1289            assert_eq!(
1290                reader1.memory_stats().cached_objects,
1291                1,
1292                "reader1 cache unchanged"
1293            );
1294        }
1295
1296        #[test]
1297        fn test_reader_with_strict_options() {
1298            let data = create_minimal_pdf();
1299            let cursor = Cursor::new(data);
1300
1301            let parse_options = ParseOptions::strict();
1302            let memory_options = MemoryOptions::default();
1303
1304            let reader =
1305                OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
1306                    .expect("Minimal PDF must parse successfully");
1307
1308            let opts = reader.options();
1309            assert_eq!(
1310                opts.strict_mode, true,
1311                "Strict options must have strict_mode=true"
1312            );
1313        }
1314
1315        #[test]
1316        fn test_reader_with_lenient_options() {
1317            let data = create_minimal_pdf();
1318            let cursor = Cursor::new(data);
1319
1320            let parse_options = ParseOptions::lenient();
1321            let memory_options = MemoryOptions::default();
1322
1323            let reader =
1324                OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
1325                    .expect("Minimal PDF must parse successfully");
1326
1327            let opts = reader.options();
1328            assert_eq!(
1329                opts.strict_mode, false,
1330                "Lenient options must have strict_mode=false"
1331            );
1332        }
1333
1334        // =============================================================================
1335        // COVERAGE EXPANSION: Tests for open*() functions (previously uncovered)
1336        // =============================================================================
1337
1338        #[test]
1339        fn test_open_from_file_path() {
1340            use std::io::Write;
1341            use tempfile::NamedTempFile;
1342
1343            // Create temp PDF file
1344            let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
1345            temp_file
1346                .write_all(&create_minimal_pdf())
1347                .expect("Failed to write PDF data");
1348
1349            let path = temp_file.path();
1350
1351            // Test open() function
1352            let result = OptimizedPdfReader::open(path);
1353
1354            assert!(result.is_ok(), "open() must succeed with valid PDF file");
1355
1356            let reader = result.unwrap();
1357
1358            // Verify it's using lenient options
1359            assert_eq!(
1360                reader.options().strict_mode,
1361                false,
1362                "open() must use lenient parsing"
1363            );
1364
1365            // Verify version was parsed correctly
1366            assert_eq!(reader.version().major, 1);
1367            assert_eq!(reader.version().minor, 4);
1368        }
1369
1370        #[test]
1371        fn test_open_with_memory_options() {
1372            use std::io::Write;
1373            use tempfile::NamedTempFile;
1374
1375            let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
1376            temp_file
1377                .write_all(&create_minimal_pdf())
1378                .expect("Failed to write PDF data");
1379
1380            let path = temp_file.path();
1381
1382            // Custom memory options with small cache
1383            let memory_options = MemoryOptions::default().with_cache_size(10);
1384
1385            // Test open_with_memory() function
1386            let result = OptimizedPdfReader::open_with_memory(path, memory_options);
1387
1388            assert!(result.is_ok(), "open_with_memory() must succeed");
1389
1390            let mut reader = result.unwrap();
1391
1392            // Verify lenient parsing
1393            assert_eq!(reader.options().strict_mode, false);
1394
1395            // Verify cache works with custom size
1396            let _ = reader.get_object(1, 0);
1397            assert_eq!(
1398                reader.memory_stats().cached_objects,
1399                1,
1400                "Cache should respect custom memory options"
1401            );
1402        }
1403
1404        #[test]
1405        fn test_open_strict_mode() {
1406            use std::io::Write;
1407            use tempfile::NamedTempFile;
1408
1409            let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
1410            temp_file
1411                .write_all(&create_minimal_pdf())
1412                .expect("Failed to write PDF data");
1413
1414            let path = temp_file.path();
1415
1416            // Test open_strict() function
1417            let result = OptimizedPdfReader::open_strict(path);
1418
1419            assert!(result.is_ok(), "open_strict() must succeed with valid PDF");
1420
1421            let reader = result.unwrap();
1422
1423            // Verify strict mode is enabled
1424            assert_eq!(
1425                reader.options().strict_mode,
1426                true,
1427                "open_strict() must use strict parsing"
1428            );
1429
1430            // Verify version parsing still works
1431            assert_eq!(reader.version().major, 1);
1432            assert_eq!(reader.version().minor, 4);
1433        }
1434
1435        #[test]
1436        fn test_open_nonexistent_file() {
1437            use std::path::PathBuf;
1438
1439            // Try to open file that doesn't exist
1440            let path = PathBuf::from("/tmp/this_file_does_not_exist_xyz_123.pdf");
1441
1442            let result = OptimizedPdfReader::open(&path);
1443
1444            assert!(result.is_err(), "open() must fail with nonexistent file");
1445
1446            // Should get IO error (file not found)
1447            match result {
1448                Err(ParseError::Io(_)) => {
1449                    // Expected error type
1450                }
1451                Err(other) => panic!("Expected IO error, got: {:?}", other),
1452                Ok(_) => panic!("Should not succeed with nonexistent file"),
1453            }
1454        }
1455
1456        #[test]
1457        fn test_load_object_from_disk_free_object() {
1458            // This tests the "free object" path in load_object_from_disk
1459            // We need a PDF with a free entry in xref
1460
1461            // PDF with free object at position 0
1462            let pdf_with_free = b"%PDF-1.4\n\
14631 0 obj\n\
1464<< /Type /Catalog /Pages 2 0 R >>\n\
1465endobj\n\
14662 0 obj\n\
1467<< /Type /Pages /Kids [3 0 R] /Count 1 >>\n\
1468endobj\n\
14693 0 obj\n\
1470<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\n\
1471endobj\n\
1472xref\n\
14730 4\n\
14740000000000 65535 f \n\
14750000000009 00000 n \n\
14760000000058 00000 n \n\
14770000000115 00000 n \n\
1478trailer\n\
1479<< /Size 4 /Root 1 0 R >>\n\
1480startxref\n\
1481186\n\
1482%%EOF\n"
1483                .to_vec();
1484
1485            let cursor = Cursor::new(pdf_with_free);
1486            let mut reader =
1487                OptimizedPdfReader::new(cursor).expect("PDF with free object must parse");
1488
1489            // Try to get object 0 (free object)
1490            let result = reader.get_object(0, 65535);
1491
1492            // Free objects return Null (not an error)
1493            if let Ok(obj) = result {
1494                assert!(
1495                    matches!(obj, PdfObject::Null),
1496                    "Free object should return Null"
1497                );
1498            }
1499        }
1500
1501        #[test]
1502        fn test_find_catalog_when_trailer_missing_root() {
1503            // Test the fallback catalog finding logic
1504            // This is tested indirectly through catalog() function
1505
1506            let data = create_minimal_pdf();
1507            let cursor = Cursor::new(data);
1508
1509            let mut reader = OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse");
1510
1511            // catalog() should use find_catalog_object if Root is missing
1512            let result = reader.catalog();
1513
1514            // With valid minimal PDF, catalog should be found
1515            if let Ok(catalog) = result {
1516                assert_eq!(
1517                    catalog.get("Type"),
1518                    Some(&PdfObject::Name(PdfName("Catalog".to_string()))),
1519                    "Catalog must have /Type /Catalog"
1520                );
1521            }
1522        }
1523
1524        #[test]
1525        fn test_load_object_generation_mismatch_strict() {
1526            // Test that strict mode rejects generation number mismatches
1527            // Use a properly formatted PDF with correct xref but intentionally
1528            // request wrong generation number
1529
1530            let data = create_minimal_pdf();
1531            let cursor = Cursor::new(data);
1532
1533            // Create with STRICT options
1534            let parse_options = ParseOptions::strict();
1535            let memory_options = MemoryOptions::default();
1536
1537            let mut reader =
1538                OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
1539                    .expect("Minimal PDF must parse in strict mode");
1540
1541            // Object 1 exists with generation 0
1542            // Try to access with wrong generation number (5) in strict mode
1543            let result = reader.get_object(1, 5);
1544
1545            // In strict mode, should get InvalidReference error
1546            assert!(
1547                result.is_err(),
1548                "Strict mode must reject generation number mismatch"
1549            );
1550
1551            if let Err(e) = result {
1552                assert!(
1553                    matches!(e, ParseError::InvalidReference(_, _)),
1554                    "Expected InvalidReference error, got: {:?}",
1555                    e
1556                );
1557            }
1558        }
1559    }
1560}