1use super::header::PdfHeader;
7use super::object_stream::ObjectStream;
8use super::objects::{PdfDictionary, PdfObject};
9use super::stack_safe::StackSafeContext;
10use super::trailer::PdfTrailer;
11use super::xref::XRefTable;
12use super::{ParseError, ParseOptions, ParseResult};
13use crate::memory::{LruCache, MemoryOptions, MemoryStats};
14use crate::objects::ObjectId;
15use std::collections::HashMap;
16use std::fs::File;
17use std::io::{BufReader, Read, Seek, SeekFrom};
18use std::path::Path;
19use std::sync::Arc;
20
21pub struct OptimizedPdfReader<R: Read + Seek> {
23 reader: BufReader<R>,
24 header: PdfHeader,
25 xref: XRefTable,
26 trailer: PdfTrailer,
27 object_cache: LruCache<ObjectId, Arc<PdfObject>>,
29 object_stream_cache: HashMap<u32, ObjectStream>,
31 #[allow(dead_code)]
33 page_tree: Option<super::page_tree::PageTree>,
34 #[allow(dead_code)]
36 parse_context: StackSafeContext,
37 options: super::ParseOptions,
39 #[allow(dead_code)]
41 memory_options: MemoryOptions,
42 memory_stats: MemoryStats,
44}
45
46impl<R: Read + Seek> OptimizedPdfReader<R> {
47 pub fn options(&self) -> &super::ParseOptions {
49 &self.options
50 }
51
52 pub fn memory_stats(&self) -> &MemoryStats {
54 &self.memory_stats
55 }
56
57 pub fn clear_cache(&mut self) {
59 self.object_cache.clear();
60 self.object_stream_cache.clear();
61 self.memory_stats.cached_objects = 0;
62 }
63}
64
65impl OptimizedPdfReader<File> {
66 pub fn open<P: AsRef<Path>>(path: P) -> ParseResult<Self> {
68 let file = File::open(path)?;
69 let options = super::ParseOptions::lenient();
70 let memory_options = MemoryOptions::default();
71 Self::new_with_options(file, options, memory_options)
72 }
73
74 pub fn open_with_memory<P: AsRef<Path>>(
76 path: P,
77 memory_options: MemoryOptions,
78 ) -> ParseResult<Self> {
79 let file = File::open(path)?;
80 let options = super::ParseOptions::lenient();
81 Self::new_with_options(file, options, memory_options)
82 }
83
84 pub fn open_strict<P: AsRef<Path>>(path: P) -> ParseResult<Self> {
86 let file = File::open(path)?;
87 let options = super::ParseOptions::strict();
88 let memory_options = MemoryOptions::default();
89 Self::new_with_options(file, options, memory_options)
90 }
91}
92
93impl<R: Read + Seek> OptimizedPdfReader<R> {
94 pub fn new(reader: R) -> ParseResult<Self> {
96 Self::new_with_options(
97 reader,
98 super::ParseOptions::default(),
99 MemoryOptions::default(),
100 )
101 }
102
103 pub fn new_with_options(
105 reader: R,
106 options: super::ParseOptions,
107 memory_options: MemoryOptions,
108 ) -> ParseResult<Self> {
109 let mut buf_reader = BufReader::new(reader);
110
111 let start_pos = buf_reader.stream_position()?;
113 buf_reader.seek(SeekFrom::End(0))?;
114 let file_size = buf_reader.stream_position()?;
115 buf_reader.seek(SeekFrom::Start(start_pos))?;
116
117 if file_size == 0 {
118 return Err(ParseError::EmptyFile);
119 }
120
121 let header = PdfHeader::parse(&mut buf_reader)?;
123
124 let xref = XRefTable::parse_with_options(&mut buf_reader, &options)?;
126
127 let trailer_dict = xref.trailer().ok_or(ParseError::InvalidTrailer)?.clone();
129
130 let xref_offset = xref.xref_offset();
131 let trailer = PdfTrailer::from_dict(trailer_dict, xref_offset)?;
132
133 trailer.validate()?;
135
136 let cache_size = memory_options.cache_size.max(1);
138 let object_cache = LruCache::new(cache_size);
139
140 Ok(Self {
141 reader: buf_reader,
142 header,
143 xref,
144 trailer,
145 object_cache,
146 object_stream_cache: HashMap::new(),
147 page_tree: None,
148 parse_context: StackSafeContext::new(),
149 options,
150 memory_options,
151 memory_stats: MemoryStats::default(),
152 })
153 }
154
155 pub fn version(&self) -> &super::header::PdfVersion {
157 &self.header.version
158 }
159
160 pub fn catalog(&mut self) -> ParseResult<&PdfDictionary> {
162 let (obj_num, gen_num) = match self.trailer.root() {
164 Ok(root) => root,
165 Err(_) => {
166 #[cfg(debug_assertions)]
168 tracing::debug!("Warning: Trailer missing Root entry, attempting recovery");
169
170 if let Some(root) = self.trailer.find_root_fallback() {
172 root
173 } else {
174 if let Ok(catalog_ref) = self.find_catalog_object() {
176 catalog_ref
177 } else {
178 return Err(ParseError::MissingKey("Root".to_string()));
179 }
180 }
181 }
182 };
183
184 let catalog = self.get_object(obj_num, gen_num)?;
185
186 catalog.as_dict().ok_or_else(|| ParseError::SyntaxError {
187 position: 0,
188 message: "Catalog is not a dictionary".to_string(),
189 })
190 }
191
192 pub fn info(&mut self) -> ParseResult<Option<&PdfDictionary>> {
194 match self.trailer.info() {
195 Some((obj_num, gen_num)) => {
196 let info = self.get_object(obj_num, gen_num)?;
197 Ok(info.as_dict())
198 }
199 None => Ok(None),
200 }
201 }
202
203 pub fn get_object(&mut self, obj_num: u32, gen_num: u16) -> ParseResult<&PdfObject> {
205 let object_id = ObjectId::new(obj_num, gen_num);
206
207 if let Some(cached_obj) = self.object_cache.get(&object_id) {
209 self.memory_stats.cache_hits += 1;
210 let ptr = Arc::as_ptr(cached_obj);
213 return Ok(unsafe { &*ptr });
214 }
215
216 self.memory_stats.cache_misses += 1;
217
218 let obj = self.load_object_from_disk(obj_num, gen_num)?;
220
221 let arc_obj = Arc::new(obj);
223 self.object_cache.put(object_id, arc_obj);
224 self.memory_stats.cached_objects = self.object_cache.len();
225
226 self.object_cache
230 .get(&object_id)
231 .map(|arc| unsafe { &*Arc::as_ptr(arc) })
232 .ok_or(ParseError::SyntaxError {
233 position: 0,
234 message: "Object not in cache after insertion".to_string(),
235 })
236 }
237
238 fn load_object_from_disk(&mut self, obj_num: u32, gen_num: u16) -> ParseResult<PdfObject> {
240 if let Some(ext_entry) = self.xref.get_extended_entry(obj_num) {
242 if let Some((stream_obj_num, index_in_stream)) = ext_entry.compressed_info {
243 return self.get_compressed_object_direct(
245 obj_num,
246 gen_num,
247 stream_obj_num,
248 index_in_stream,
249 );
250 }
251 }
252
253 let entry = self
255 .xref
256 .get_entry(obj_num)
257 .ok_or(ParseError::InvalidReference(obj_num, gen_num))?;
258
259 if !entry.in_use {
260 return Ok(PdfObject::Null);
262 }
263
264 if entry.generation != gen_num {
265 return Err(ParseError::InvalidReference(obj_num, gen_num));
266 }
267
268 self.reader.seek(std::io::SeekFrom::Start(entry.offset))?;
270
271 let mut lexer =
273 super::lexer::Lexer::new_with_options(&mut self.reader, self.options.clone());
274
275 let token = lexer.next_token()?;
277 let read_obj_num = match token {
278 super::lexer::Token::Integer(n) => n as u32,
279 _ => {
280 if self.options.lenient_syntax {
282 if self.options.collect_warnings {
283 tracing::debug!(
284 "Warning: Using expected object number {obj_num} instead of parsed token"
285 );
286 }
287 obj_num
288 } else {
289 return Err(ParseError::SyntaxError {
290 position: entry.offset as usize,
291 message: "Expected object number".to_string(),
292 });
293 }
294 }
295 };
296
297 if read_obj_num != obj_num && !self.options.lenient_syntax {
298 return Err(ParseError::SyntaxError {
299 position: entry.offset as usize,
300 message: format!(
301 "Object number mismatch: expected {obj_num}, found {read_obj_num}"
302 ),
303 });
304 }
305
306 let token = lexer.next_token()?;
308 let read_gen_num = match token {
309 super::lexer::Token::Integer(n) => n as u16,
310 _ => {
311 if self.options.lenient_syntax {
312 if self.options.collect_warnings {
313 tracing::debug!(
314 "Warning: Using generation 0 instead of parsed token for object {obj_num}"
315 );
316 }
317 0
318 } else {
319 return Err(ParseError::SyntaxError {
320 position: entry.offset as usize,
321 message: "Expected generation number".to_string(),
322 });
323 }
324 }
325 };
326
327 if read_gen_num != gen_num && !self.options.lenient_syntax {
328 return Err(ParseError::SyntaxError {
329 position: entry.offset as usize,
330 message: format!(
331 "Generation number mismatch: expected {gen_num}, found {read_gen_num}"
332 ),
333 });
334 }
335
336 let token = lexer.next_token()?;
338 match token {
339 super::lexer::Token::Obj => {}
340 _ => {
341 if self.options.lenient_syntax {
342 if self.options.collect_warnings {
343 tracing::debug!("Warning: Missing 'obj' keyword for object {obj_num}");
344 }
345 } else {
346 return Err(ParseError::SyntaxError {
347 position: entry.offset as usize,
348 message: "Expected 'obj' keyword".to_string(),
349 });
350 }
351 }
352 }
353
354 let object = PdfObject::parse(&mut lexer)?;
356
357 if let Ok(token) = lexer.peek_token() {
359 if let super::lexer::Token::EndObj = token {
360 let _ = lexer.next_token();
361 } else if !self.options.lenient_syntax && self.options.collect_warnings {
362 tracing::debug!("Warning: Missing 'endobj' for object {obj_num}");
363 }
364 }
365
366 Ok(object)
367 }
368
369 fn get_compressed_object_direct(
371 &mut self,
372 obj_num: u32,
373 _gen_num: u16,
374 stream_obj_num: u32,
375 _index_in_stream: u32,
376 ) -> ParseResult<PdfObject> {
377 if !self.object_stream_cache.contains_key(&stream_obj_num) {
379 let stream_obj = self.load_object_from_disk(stream_obj_num, 0)?;
381
382 if let PdfObject::Stream(stream) = stream_obj {
383 let obj_stream = ObjectStream::parse(stream, &ParseOptions::default())?;
384 self.object_stream_cache.insert(stream_obj_num, obj_stream);
385 } else {
386 return Err(ParseError::SyntaxError {
387 position: 0,
388 message: "Object stream is not a stream object".to_string(),
389 });
390 }
391 }
392
393 let obj_stream = self
395 .object_stream_cache
396 .get(&stream_obj_num)
397 .ok_or_else(|| ParseError::SyntaxError {
398 position: 0,
399 message: "Object stream not found in cache".to_string(),
400 })?;
401
402 obj_stream
403 .get_object(obj_num)
404 .cloned()
405 .ok_or(ParseError::InvalidReference(obj_num, 0))
406 }
407
408 fn find_catalog_object(&mut self) -> ParseResult<(u32, u16)> {
410 for obj_num in 1..100 {
413 if let Ok(PdfObject::Dictionary(dict)) = self.get_object(obj_num, 0) {
414 if let Some(PdfObject::Name(type_name)) = dict.get("Type") {
415 if type_name.0.as_bytes() == b"Catalog" {
416 return Ok((obj_num, 0));
417 }
418 }
419 }
420 }
421 Err(ParseError::MissingKey("Catalog".to_string()))
422 }
423
424 pub fn reader(&mut self) -> &mut BufReader<R> {
426 &mut self.reader
427 }
428}
429
430pub fn estimate_object_size(obj: &PdfObject) -> usize {
432 match obj {
433 PdfObject::Null => 8,
434 PdfObject::Boolean(_) => 16,
435 PdfObject::Integer(_) => 16,
436 PdfObject::Real(_) => 16,
437 PdfObject::String(s) => 24 + s.as_bytes().len(),
438 PdfObject::Name(n) => 24 + n.0.len(),
439 PdfObject::Array(arr) => {
440 24 + arr.len() * 8 + arr.0.iter().map(estimate_object_size).sum::<usize>()
441 }
442 PdfObject::Dictionary(dict) => {
443 24 + dict.0.len() * 16
444 + dict
445 .0
446 .iter()
447 .map(|(k, v)| k.0.len() + estimate_object_size(v))
448 .sum::<usize>()
449 }
450 PdfObject::Stream(s) => {
451 48 + s.data.len() + estimate_object_size(&PdfObject::Dictionary(s.dict.clone()))
452 }
453 PdfObject::Reference(_, _) => 16,
454 }
455}
456
457#[cfg(test)]
458mod tests {
459 use super::*;
460 use crate::parser::objects::{PdfArray, PdfDictionary, PdfName, PdfStream, PdfString};
461 use std::io::Cursor;
462
463 fn create_minimal_pdf() -> Vec<u8> {
464 b"%PDF-1.4\n\
4711 0 obj\n\
472<< /Type /Catalog /Pages 2 0 R >>\n\
473endobj\n\
4742 0 obj\n\
475<< /Type /Pages /Kids [3 0 R] /Count 1 >>\n\
476endobj\n\
4773 0 obj\n\
478<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\n\
479endobj\n\
480xref\n\
4810 4\n\
4820000000000 65535 f \n\
4830000000009 00000 n \n\
4840000000058 00000 n \n\
4850000000115 00000 n \n\
486trailer\n\
487<< /Size 4 /Root 1 0 R >>\n\
488startxref\n\
489186\n\
490%%EOF\n"
491 .to_vec()
492 }
493
494 fn create_empty_pdf() -> Vec<u8> {
495 Vec::new()
496 }
497
498 fn create_invalid_pdf() -> Vec<u8> {
499 b"Not a PDF file".to_vec()
500 }
501
502 #[test]
503 fn test_memory_options_integration() {
504 let options = MemoryOptions::default().with_cache_size(100);
505 assert_eq!(options.cache_size, 100);
506
507 let options = MemoryOptions::default().with_cache_size(0);
508 assert_eq!(options.cache_size, 0);
509 }
510
511 #[test]
512 fn test_object_size_estimation_basic_types() {
513 let obj = PdfObject::Null;
515 assert_eq!(estimate_object_size(&obj), 8);
516
517 let obj = PdfObject::Boolean(true);
519 assert_eq!(estimate_object_size(&obj), 16);
520
521 let obj = PdfObject::Boolean(false);
522 assert_eq!(estimate_object_size(&obj), 16);
523
524 let obj = PdfObject::Integer(42);
526 assert_eq!(estimate_object_size(&obj), 16);
527
528 let obj = PdfObject::Integer(-1000);
529 assert_eq!(estimate_object_size(&obj), 16);
530
531 let obj = PdfObject::Real(3.14159);
533 assert_eq!(estimate_object_size(&obj), 16);
534
535 let obj = PdfObject::Reference(5, 0);
537 assert_eq!(estimate_object_size(&obj), 16);
538 }
539
540 #[test]
541 fn test_object_size_estimation_string_types() {
542 let obj = PdfObject::String(PdfString::new(b"".to_vec()));
544 assert_eq!(estimate_object_size(&obj), 24);
545
546 let obj = PdfObject::String(PdfString::new(b"Hello".to_vec()));
548 assert_eq!(estimate_object_size(&obj), 24 + 5);
549
550 let long_text = "A".repeat(1000);
552 let obj = PdfObject::String(PdfString::new(long_text.as_bytes().to_vec()));
553 assert_eq!(estimate_object_size(&obj), 24 + 1000);
554
555 let obj = PdfObject::Name(PdfName::new("Type".to_string()));
557 assert_eq!(estimate_object_size(&obj), 24 + 4);
558
559 let obj = PdfObject::Name(PdfName::new("".to_string()));
560 assert_eq!(estimate_object_size(&obj), 24);
561 }
562
563 #[test]
564 fn test_object_size_estimation_array() {
565 let obj = PdfObject::Array(PdfArray(vec![]));
567 assert_eq!(estimate_object_size(&obj), 24);
568
569 let obj = PdfObject::Array(PdfArray(vec![
571 PdfObject::Integer(1),
572 PdfObject::Integer(2),
573 PdfObject::Integer(3),
574 ]));
575 assert_eq!(estimate_object_size(&obj), 24 + 3 * 8 + 3 * 16);
576
577 let inner_array = PdfObject::Array(PdfArray(vec![
579 PdfObject::Integer(10),
580 PdfObject::Integer(20),
581 ]));
582 let obj = PdfObject::Array(PdfArray(vec![PdfObject::Integer(1), inner_array]));
583 let expected = 24 + 2 * 8 + 16 + (24 + 2 * 8 + 2 * 16);
584 assert_eq!(estimate_object_size(&obj), expected);
585 }
586
587 #[test]
588 fn test_object_size_estimation_dictionary() {
589 let obj = PdfObject::Dictionary(PdfDictionary::new());
591 assert_eq!(estimate_object_size(&obj), 24);
592
593 let mut dict = PdfDictionary::new();
595 dict.insert(
596 "Type".to_string(),
597 PdfObject::Name(PdfName::new("Catalog".to_string())),
598 );
599 dict.insert("Count".to_string(), PdfObject::Integer(5));
600
601 let obj = PdfObject::Dictionary(dict);
602 let expected = 24 + 2 * 16 + (4 + 24 + 7) + (5 + 16);
603 assert_eq!(estimate_object_size(&obj), expected);
604 }
605
606 #[test]
607 fn test_object_size_estimation_stream() {
608 let mut dict = PdfDictionary::new();
609 dict.insert("Length".to_string(), PdfObject::Integer(10));
610
611 let stream = PdfObject::Stream(PdfStream {
612 dict: dict.clone(),
613 data: b"Hello Test".to_vec(),
614 });
615
616 let dict_size = estimate_object_size(&PdfObject::Dictionary(dict));
617 let expected = 48 + 10 + dict_size;
618 assert_eq!(estimate_object_size(&stream), expected);
619 }
620
621 #[test]
622 fn test_object_size_estimation_complex_structure() {
623 let mut inner_dict = PdfDictionary::new();
625 inner_dict.insert(
626 "Font".to_string(),
627 PdfObject::Name(PdfName::new("Helvetica".to_string())),
628 );
629 inner_dict.insert("Size".to_string(), PdfObject::Integer(12));
630
631 let array = PdfObject::Array(PdfArray(vec![
632 PdfObject::String(PdfString::new(b"Text content".to_vec())),
633 PdfObject::Dictionary(inner_dict),
634 PdfObject::Reference(10, 0),
635 ]));
636
637 let mut main_dict = PdfDictionary::new();
638 main_dict.insert(
639 "Type".to_string(),
640 PdfObject::Name(PdfName::new("Page".to_string())),
641 );
642 main_dict.insert("Contents".to_string(), array);
643
644 let obj = PdfObject::Dictionary(main_dict);
645
646 let size = estimate_object_size(&obj);
648 assert!(size > 100);
649 assert!(size < 1000);
650 }
651
652 #[test]
653 fn test_optimized_reader_empty_file() {
654 let data = create_empty_pdf();
655 let cursor = Cursor::new(data);
656
657 let result = OptimizedPdfReader::new(cursor);
658 assert!(result.is_err());
659 if let Err(ParseError::EmptyFile) = result {
660 } else {
662 panic!("Expected EmptyFile error");
663 }
664 }
665
666 #[test]
667 fn test_optimized_reader_invalid_file() {
668 let data = create_invalid_pdf();
669 let cursor = Cursor::new(data);
670
671 let result = OptimizedPdfReader::new(cursor);
672 assert!(result.is_err());
673 }
675
676 #[test]
677 fn test_optimized_reader_creation_with_options() {
678 let data = create_minimal_pdf();
679 let cursor = Cursor::new(data);
680
681 let parse_options = ParseOptions {
682 lenient_syntax: true,
683 collect_warnings: false,
684 ..Default::default()
685 };
686
687 let memory_options = MemoryOptions::default().with_cache_size(50);
688
689 let result = OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options);
690 if result.is_err() {
691 return;
693 }
694
695 let reader = result.unwrap();
696 assert!(reader.options().lenient_syntax);
697 assert!(!reader.options().collect_warnings);
698 }
699
700 #[test]
701 fn test_optimized_reader_version_access() {
702 let data = create_minimal_pdf();
703 let cursor = Cursor::new(data);
704
705 let result = OptimizedPdfReader::new(cursor);
706 if result.is_err() {
707 return;
709 }
710
711 let reader = result.unwrap();
712 let version = reader.version();
713
714 assert_eq!(version.major, 1);
716 assert_eq!(version.minor, 4);
717 }
718
719 #[test]
720 fn test_memory_options_validation() {
721 let data = create_minimal_pdf();
722 let cursor = Cursor::new(data);
723
724 let memory_options = MemoryOptions::default().with_cache_size(0);
726 let parse_options = ParseOptions::default();
727
728 let result = OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options);
729 if result.is_err() {
730 let memory_opts = MemoryOptions::default().with_cache_size(0);
732 let cache_size = memory_opts.cache_size.max(1);
733 assert_eq!(cache_size, 1);
734 }
735 }
736
737 #[test]
738 fn test_estimate_object_size_edge_cases() {
739 let large_array = PdfObject::Array(PdfArray((0..1000).map(PdfObject::Integer).collect()));
741 let size = estimate_object_size(&large_array);
742 assert!(size > 16000); let mut large_dict = PdfDictionary::new();
746 for i in 0..100 {
747 large_dict.insert(
748 format!("Key{i}"),
749 PdfObject::String(PdfString::new(format!("Value{i}").as_bytes().to_vec())),
750 );
751 }
752 let obj = PdfObject::Dictionary(large_dict);
753 let size = estimate_object_size(&obj);
754 assert!(size > 1000);
755 }
756
757 #[test]
758 fn test_memory_options_default_values() {
759 let options = MemoryOptions::default();
760
761 assert!(options.cache_size > 0);
763 assert!(options.cache_size < 10000); }
765
766 #[test]
767 fn test_memory_options_builder_pattern() {
768 let options = MemoryOptions::default().with_cache_size(500);
769
770 assert_eq!(options.cache_size, 500);
771 }
772
773 #[test]
774 fn test_object_size_estimation_consistency() {
775 let obj1 = PdfObject::String(PdfString::new(b"Test".to_vec()));
777 let obj2 = PdfObject::String(PdfString::new(b"Test".to_vec()));
778
779 assert_eq!(estimate_object_size(&obj1), estimate_object_size(&obj2));
780
781 let obj3 = PdfObject::String(PdfString::new(b"Different".to_vec()));
783 assert_ne!(estimate_object_size(&obj1), estimate_object_size(&obj3));
784 }
785
786 #[test]
787 fn test_object_size_estimation_zero_values() {
788 let obj = PdfObject::Integer(0);
790 assert_eq!(estimate_object_size(&obj), 16);
791
792 let obj = PdfObject::Real(0.0);
794 assert_eq!(estimate_object_size(&obj), 16);
795
796 let obj = PdfObject::Reference(0, 0);
798 assert_eq!(estimate_object_size(&obj), 16);
799 }
800
801 #[test]
802 fn test_object_size_estimation_negative_values() {
803 let obj = PdfObject::Integer(-42);
804 assert_eq!(estimate_object_size(&obj), 16);
805
806 let obj = PdfObject::Real(-3.14159);
807 assert_eq!(estimate_object_size(&obj), 16);
808 }
809
810 #[test]
811 fn test_object_size_estimation_unicode_strings() {
812 let unicode_text = "Hello 世界 🌍";
814 let obj = PdfObject::String(PdfString::new(unicode_text.as_bytes().to_vec()));
815 let expected_size = 24 + unicode_text.len();
816 assert_eq!(estimate_object_size(&obj), expected_size);
817 }
818
819 #[test]
820 fn test_object_size_estimation_mixed_array() {
821 let obj = PdfObject::Array(PdfArray(vec![
822 PdfObject::Null,
823 PdfObject::Boolean(true),
824 PdfObject::Integer(42),
825 PdfObject::Real(3.14),
826 PdfObject::String(PdfString::new(b"test".to_vec())),
827 PdfObject::Name(PdfName::new("Name".to_string())),
828 PdfObject::Reference(1, 0),
829 ]));
830
831 let expected = 24 + 7 * 8 + 8 + 16 + 16 + 16 + (24 + 4) + (24 + 4) + 16;
832 assert_eq!(estimate_object_size(&obj), expected);
833 }
834
835 #[test]
836 fn test_find_catalog_object_range() {
837 let data = create_minimal_pdf();
840 let cursor = Cursor::new(data);
841
842 if let Ok(mut reader) = OptimizedPdfReader::new(cursor) {
845 let _result = reader.find_catalog_object();
848 }
850 }
851
852 #[test]
853 fn test_memory_stats_tracking() {
854 let data = create_minimal_pdf();
856 let cursor = Cursor::new(data);
857
858 if let Ok(reader) = OptimizedPdfReader::new(cursor) {
859 assert_eq!(reader.memory_stats.cache_hits, 0);
861 assert_eq!(reader.memory_stats.cache_misses, 0);
862 assert_eq!(reader.memory_stats.cached_objects, 0);
863 }
864 }
865
866 mod rigorous {
871 use super::*;
872
873 #[test]
874 fn test_lru_cache_hit_tracking() {
875 let data = create_minimal_pdf();
876 let cursor = Cursor::new(data);
877
878 let mut reader =
879 OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
880
881 assert_eq!(
883 reader.memory_stats().cache_hits,
884 0,
885 "Cache hits must start at 0"
886 );
887 assert_eq!(
888 reader.memory_stats().cache_misses,
889 0,
890 "Cache misses must start at 0"
891 );
892
893 let _ = reader.get_object(1, 0);
895 assert_eq!(
896 reader.memory_stats().cache_misses,
897 1,
898 "First access must be cache miss"
899 );
900 assert_eq!(reader.memory_stats().cache_hits, 0, "No cache hits yet");
901
902 let _ = reader.get_object(1, 0);
904 assert_eq!(
905 reader.memory_stats().cache_hits,
906 1,
907 "Second access must be cache hit"
908 );
909 assert_eq!(
910 reader.memory_stats().cache_misses,
911 1,
912 "Cache misses unchanged"
913 );
914
915 let _ = reader.get_object(1, 0);
917 assert_eq!(
918 reader.memory_stats().cache_hits,
919 2,
920 "Third access must increment cache hits"
921 );
922 }
923
924 #[test]
925 fn test_lru_cache_capacity_enforcement() {
926 let data = create_minimal_pdf();
927 let cursor = Cursor::new(data);
928
929 let memory_options = MemoryOptions::default().with_cache_size(2);
931 let parse_options = ParseOptions::default();
932
933 let mut reader =
934 OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
935 .expect("Minimal PDF must parse successfully");
936
937 let _ = reader.get_object(1, 0);
939 assert_eq!(
940 reader.memory_stats().cached_objects,
941 1,
942 "Cache should have 1 object"
943 );
944
945 let _ = reader.get_object(2, 0);
947 assert_eq!(
948 reader.memory_stats().cached_objects,
949 2,
950 "Cache should have 2 objects"
951 );
952
953 let _ = reader.get_object(3, 0);
955 assert_eq!(
956 reader.memory_stats().cached_objects,
957 2,
958 "Cache must not exceed capacity of 2"
959 );
960 }
961
962 #[test]
963 fn test_cache_clear_resets_stats() {
964 let data = create_minimal_pdf();
965 let cursor = Cursor::new(data);
966
967 let mut reader =
968 OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
969
970 let _ = reader.get_object(1, 0);
972 let _ = reader.get_object(1, 0); assert!(reader.memory_stats().cache_hits > 0);
976 assert!(reader.memory_stats().cached_objects > 0);
977
978 reader.clear_cache();
980
981 assert_eq!(
983 reader.memory_stats().cached_objects,
984 0,
985 "Cache should be empty after clear"
986 );
987
988 let _ = reader.get_object(1, 0);
991 assert!(
992 reader.memory_stats().cache_misses >= 2,
993 "Access after clear must be cache miss"
994 );
995 }
996
997 #[test]
998 fn test_empty_file_error_handling() {
999 let data = create_empty_pdf();
1000 let cursor = Cursor::new(data);
1001
1002 let result = OptimizedPdfReader::new(cursor);
1003
1004 assert!(result.is_err(), "Empty file must return error");
1005 match result {
1006 Err(ParseError::EmptyFile) => {
1007 }
1009 Err(other) => panic!("Expected EmptyFile error, got: {:?}", other),
1010 Ok(_) => panic!("Should not succeed with empty file"),
1011 }
1012 }
1013
1014 #[test]
1015 fn test_invalid_header_error_handling() {
1016 let data = create_invalid_pdf();
1017 let cursor = Cursor::new(data);
1018
1019 let result = OptimizedPdfReader::new(cursor);
1020
1021 assert!(result.is_err(), "Invalid PDF must return error");
1022 }
1024
1025 #[test]
1026 fn test_version_parsing_exact_values() {
1027 let data = create_minimal_pdf();
1028 let cursor = Cursor::new(data);
1029
1030 let reader =
1031 OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
1032
1033 let version = reader.version();
1034
1035 assert_eq!(version.major, 1, "PDF major version must be 1");
1037 assert_eq!(version.minor, 4, "PDF minor version must be 4");
1038 }
1039
1040 #[test]
1041 fn test_options_accessibility() {
1042 let data = create_minimal_pdf();
1043 let cursor = Cursor::new(data);
1044
1045 let parse_options = ParseOptions {
1046 lenient_syntax: true,
1047 collect_warnings: false,
1048 ..Default::default()
1049 };
1050 let memory_options = MemoryOptions::default().with_cache_size(100);
1051
1052 let reader =
1053 OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
1054 .expect("Minimal PDF must parse successfully");
1055
1056 let opts = reader.options();
1057
1058 assert_eq!(
1059 opts.lenient_syntax, true,
1060 "Options must match provided values"
1061 );
1062 assert_eq!(
1063 opts.collect_warnings, false,
1064 "Options must match provided values"
1065 );
1066 }
1067
1068 #[test]
1069 fn test_catalog_access_requires_valid_trailer() {
1070 let data = create_minimal_pdf();
1071 let cursor = Cursor::new(data);
1072
1073 let mut reader =
1074 OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
1075
1076 let catalog_result = reader.catalog();
1078
1079 if catalog_result.is_ok() {
1080 let catalog = catalog_result.unwrap();
1081
1082 assert_eq!(
1084 catalog.get("Type"),
1085 Some(&PdfObject::Name(PdfName("Catalog".to_string()))),
1086 "Catalog must have /Type /Catalog"
1087 );
1088 } else {
1089 assert!(matches!(
1091 catalog_result.unwrap_err(),
1092 ParseError::MissingKey(_) | ParseError::SyntaxError { .. }
1093 ));
1094 }
1095 }
1096
1097 #[test]
1098 fn test_info_none_when_absent() {
1099 let data = create_minimal_pdf();
1100 let cursor = Cursor::new(data);
1101
1102 let mut reader =
1103 OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
1104
1105 let info_result = reader.info();
1106
1107 if info_result.is_ok() {
1108 let info = info_result.unwrap();
1109 assert!(
1111 info.is_none(),
1112 "Info should be None when not present in trailer"
1113 );
1114 }
1115 }
1116
1117 #[test]
1118 fn test_get_object_wrong_generation() {
1119 let data = create_minimal_pdf();
1120 let cursor = Cursor::new(data);
1121
1122 let mut reader =
1123 OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
1124
1125 let result = reader.get_object(1, 5); if result.is_err() {
1130 assert!(matches!(
1131 result.unwrap_err(),
1132 ParseError::InvalidReference(_, _)
1133 ));
1134 }
1135 }
1136
1137 #[test]
1138 fn test_get_nonexistent_object() {
1139 let data = create_minimal_pdf();
1140 let cursor = Cursor::new(data);
1141
1142 let mut reader =
1143 OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse successfully");
1144
1145 let result = reader.get_object(9999, 0);
1147
1148 assert!(
1149 result.is_err(),
1150 "Accessing nonexistent object must return error"
1151 );
1152 assert!(matches!(
1153 result.unwrap_err(),
1154 ParseError::InvalidReference(_, _)
1155 ));
1156 }
1157
1158 #[test]
1159 fn test_memory_options_min_cache_size() {
1160 let data = create_minimal_pdf();
1161 let cursor = Cursor::new(data);
1162
1163 let memory_options = MemoryOptions::default().with_cache_size(0);
1165 let parse_options = ParseOptions::default();
1166
1167 let mut reader =
1168 OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
1169 .expect("Minimal PDF must parse successfully");
1170
1171 let _ = reader.get_object(1, 0);
1173 assert_eq!(
1174 reader.memory_stats().cached_objects,
1175 1,
1176 "Must cache at least 1 object even with cache_size=0"
1177 );
1178 }
1179
1180 #[test]
1181 fn test_estimate_object_size_exact_values() {
1182 assert_eq!(estimate_object_size(&PdfObject::Null), 8);
1186
1187 assert_eq!(estimate_object_size(&PdfObject::Boolean(true)), 16);
1189 assert_eq!(estimate_object_size(&PdfObject::Boolean(false)), 16);
1190
1191 assert_eq!(estimate_object_size(&PdfObject::Integer(0)), 16);
1193 assert_eq!(estimate_object_size(&PdfObject::Integer(42)), 16);
1194 assert_eq!(estimate_object_size(&PdfObject::Integer(-1000)), 16);
1195
1196 assert_eq!(estimate_object_size(&PdfObject::Real(0.0)), 16);
1198 assert_eq!(estimate_object_size(&PdfObject::Real(3.14159)), 16);
1199
1200 assert_eq!(estimate_object_size(&PdfObject::Reference(1, 0)), 16);
1202 assert_eq!(estimate_object_size(&PdfObject::Reference(999, 5)), 16);
1203 }
1204
1205 #[test]
1206 fn test_estimate_string_size_formula() {
1207 let empty = PdfObject::String(PdfString::new(vec![]));
1211 assert_eq!(estimate_object_size(&empty), 24);
1212
1213 let ten_bytes = PdfObject::String(PdfString::new(b"0123456789".to_vec()));
1215 assert_eq!(estimate_object_size(&ten_bytes), 24 + 10);
1216
1217 let hundred_bytes = PdfObject::String(PdfString::new(vec![b'X'; 100]));
1219 assert_eq!(estimate_object_size(&hundred_bytes), 24 + 100);
1220 }
1221
1222 #[test]
1223 fn test_estimate_array_size_formula() {
1224 let empty = PdfObject::Array(PdfArray(vec![]));
1228 assert_eq!(estimate_object_size(&empty), 24);
1229
1230 let three_ints = PdfObject::Array(PdfArray(vec![
1232 PdfObject::Integer(1),
1233 PdfObject::Integer(2),
1234 PdfObject::Integer(3),
1235 ]));
1236 assert_eq!(estimate_object_size(&three_ints), 24 + 24 + 48);
1237 }
1238
1239 #[test]
1240 fn test_estimate_dictionary_size_formula() {
1241 let empty = PdfObject::Dictionary(PdfDictionary::new());
1245 assert_eq!(estimate_object_size(&empty), 24);
1246
1247 let mut dict = PdfDictionary::new();
1249 dict.insert(
1250 "Type".to_string(),
1251 PdfObject::Name(PdfName::new("Page".to_string())),
1252 );
1253 let obj = PdfObject::Dictionary(dict);
1254 let expected = 24 + 16 + 4 + (24 + 4); assert_eq!(estimate_object_size(&obj), expected);
1256 }
1257
1258 #[test]
1259 fn test_cache_isolation_between_instances() {
1260 let data = create_minimal_pdf();
1261
1262 let cursor1 = Cursor::new(data.clone());
1264 let cursor2 = Cursor::new(data);
1265
1266 let mut reader1 =
1267 OptimizedPdfReader::new(cursor1).expect("Minimal PDF must parse successfully");
1268 let mut reader2 =
1269 OptimizedPdfReader::new(cursor2).expect("Minimal PDF must parse successfully");
1270
1271 let _ = reader1.get_object(1, 0);
1273 assert_eq!(reader1.memory_stats().cached_objects, 1);
1274
1275 assert_eq!(
1277 reader2.memory_stats().cached_objects,
1278 0,
1279 "Readers must have independent caches"
1280 );
1281
1282 let _ = reader2.get_object(1, 0);
1284 assert_eq!(
1285 reader2.memory_stats().cached_objects,
1286 1,
1287 "reader2 cache should now have 1 object"
1288 );
1289 assert_eq!(
1290 reader1.memory_stats().cached_objects,
1291 1,
1292 "reader1 cache unchanged"
1293 );
1294 }
1295
1296 #[test]
1297 fn test_reader_with_strict_options() {
1298 let data = create_minimal_pdf();
1299 let cursor = Cursor::new(data);
1300
1301 let parse_options = ParseOptions::strict();
1302 let memory_options = MemoryOptions::default();
1303
1304 let reader =
1305 OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
1306 .expect("Minimal PDF must parse successfully");
1307
1308 let opts = reader.options();
1309 assert_eq!(
1310 opts.strict_mode, true,
1311 "Strict options must have strict_mode=true"
1312 );
1313 }
1314
1315 #[test]
1316 fn test_reader_with_lenient_options() {
1317 let data = create_minimal_pdf();
1318 let cursor = Cursor::new(data);
1319
1320 let parse_options = ParseOptions::lenient();
1321 let memory_options = MemoryOptions::default();
1322
1323 let reader =
1324 OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
1325 .expect("Minimal PDF must parse successfully");
1326
1327 let opts = reader.options();
1328 assert_eq!(
1329 opts.strict_mode, false,
1330 "Lenient options must have strict_mode=false"
1331 );
1332 }
1333
1334 #[test]
1339 fn test_open_from_file_path() {
1340 use std::io::Write;
1341 use tempfile::NamedTempFile;
1342
1343 let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
1345 temp_file
1346 .write_all(&create_minimal_pdf())
1347 .expect("Failed to write PDF data");
1348
1349 let path = temp_file.path();
1350
1351 let result = OptimizedPdfReader::open(path);
1353
1354 assert!(result.is_ok(), "open() must succeed with valid PDF file");
1355
1356 let reader = result.unwrap();
1357
1358 assert_eq!(
1360 reader.options().strict_mode,
1361 false,
1362 "open() must use lenient parsing"
1363 );
1364
1365 assert_eq!(reader.version().major, 1);
1367 assert_eq!(reader.version().minor, 4);
1368 }
1369
1370 #[test]
1371 fn test_open_with_memory_options() {
1372 use std::io::Write;
1373 use tempfile::NamedTempFile;
1374
1375 let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
1376 temp_file
1377 .write_all(&create_minimal_pdf())
1378 .expect("Failed to write PDF data");
1379
1380 let path = temp_file.path();
1381
1382 let memory_options = MemoryOptions::default().with_cache_size(10);
1384
1385 let result = OptimizedPdfReader::open_with_memory(path, memory_options);
1387
1388 assert!(result.is_ok(), "open_with_memory() must succeed");
1389
1390 let mut reader = result.unwrap();
1391
1392 assert_eq!(reader.options().strict_mode, false);
1394
1395 let _ = reader.get_object(1, 0);
1397 assert_eq!(
1398 reader.memory_stats().cached_objects,
1399 1,
1400 "Cache should respect custom memory options"
1401 );
1402 }
1403
1404 #[test]
1405 fn test_open_strict_mode() {
1406 use std::io::Write;
1407 use tempfile::NamedTempFile;
1408
1409 let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
1410 temp_file
1411 .write_all(&create_minimal_pdf())
1412 .expect("Failed to write PDF data");
1413
1414 let path = temp_file.path();
1415
1416 let result = OptimizedPdfReader::open_strict(path);
1418
1419 assert!(result.is_ok(), "open_strict() must succeed with valid PDF");
1420
1421 let reader = result.unwrap();
1422
1423 assert_eq!(
1425 reader.options().strict_mode,
1426 true,
1427 "open_strict() must use strict parsing"
1428 );
1429
1430 assert_eq!(reader.version().major, 1);
1432 assert_eq!(reader.version().minor, 4);
1433 }
1434
1435 #[test]
1436 fn test_open_nonexistent_file() {
1437 use std::path::PathBuf;
1438
1439 let path = PathBuf::from("/tmp/this_file_does_not_exist_xyz_123.pdf");
1441
1442 let result = OptimizedPdfReader::open(&path);
1443
1444 assert!(result.is_err(), "open() must fail with nonexistent file");
1445
1446 match result {
1448 Err(ParseError::Io(_)) => {
1449 }
1451 Err(other) => panic!("Expected IO error, got: {:?}", other),
1452 Ok(_) => panic!("Should not succeed with nonexistent file"),
1453 }
1454 }
1455
1456 #[test]
1457 fn test_load_object_from_disk_free_object() {
1458 let pdf_with_free = b"%PDF-1.4\n\
14631 0 obj\n\
1464<< /Type /Catalog /Pages 2 0 R >>\n\
1465endobj\n\
14662 0 obj\n\
1467<< /Type /Pages /Kids [3 0 R] /Count 1 >>\n\
1468endobj\n\
14693 0 obj\n\
1470<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\n\
1471endobj\n\
1472xref\n\
14730 4\n\
14740000000000 65535 f \n\
14750000000009 00000 n \n\
14760000000058 00000 n \n\
14770000000115 00000 n \n\
1478trailer\n\
1479<< /Size 4 /Root 1 0 R >>\n\
1480startxref\n\
1481186\n\
1482%%EOF\n"
1483 .to_vec();
1484
1485 let cursor = Cursor::new(pdf_with_free);
1486 let mut reader =
1487 OptimizedPdfReader::new(cursor).expect("PDF with free object must parse");
1488
1489 let result = reader.get_object(0, 65535);
1491
1492 if let Ok(obj) = result {
1494 assert!(
1495 matches!(obj, PdfObject::Null),
1496 "Free object should return Null"
1497 );
1498 }
1499 }
1500
1501 #[test]
1502 fn test_find_catalog_when_trailer_missing_root() {
1503 let data = create_minimal_pdf();
1507 let cursor = Cursor::new(data);
1508
1509 let mut reader = OptimizedPdfReader::new(cursor).expect("Minimal PDF must parse");
1510
1511 let result = reader.catalog();
1513
1514 if let Ok(catalog) = result {
1516 assert_eq!(
1517 catalog.get("Type"),
1518 Some(&PdfObject::Name(PdfName("Catalog".to_string()))),
1519 "Catalog must have /Type /Catalog"
1520 );
1521 }
1522 }
1523
1524 #[test]
1525 fn test_load_object_generation_mismatch_strict() {
1526 let data = create_minimal_pdf();
1531 let cursor = Cursor::new(data);
1532
1533 let parse_options = ParseOptions::strict();
1535 let memory_options = MemoryOptions::default();
1536
1537 let mut reader =
1538 OptimizedPdfReader::new_with_options(cursor, parse_options, memory_options)
1539 .expect("Minimal PDF must parse in strict mode");
1540
1541 let result = reader.get_object(1, 5);
1544
1545 assert!(
1547 result.is_err(),
1548 "Strict mode must reject generation number mismatch"
1549 );
1550
1551 if let Err(e) = result {
1552 assert!(
1553 matches!(e, ParseError::InvalidReference(_, _)),
1554 "Expected InvalidReference error, got: {:?}",
1555 e
1556 );
1557 }
1558 }
1559 }
1560}