1use std::collections::HashMap;
12use std::sync::OnceLock;
13
14use dashmap::DashMap;
15use rpdfium_core::error::PdfError;
16use rpdfium_core::fx_system::MAX_RECURSION_DEPTH;
17use rpdfium_core::{Name, ParsingMode, PdfSource};
18use smallvec::SmallVec;
19
20use crate::header::{PdfVersion, parse_header};
21use crate::object::{Object, ObjectId, StreamData};
22use crate::object_parser::parse_indirect_object;
23use crate::security::SecurityHandler;
24use crate::trailer::{TrailerInfo, parse_all_xrefs};
25use crate::xref::{XrefEntryType, XrefSection, XrefTable};
26
27pub(crate) enum ObjectSlot {
32 Direct {
34 offset: u64,
35 parsed: OnceLock<Result<Object, PdfError>>,
36 },
37 InStream {
39 stream_id: ObjectId,
40 index: u32,
41 parsed: OnceLock<Result<Object, PdfError>>,
42 },
43 Free,
45}
46
47const MAX_OBJSTM_CACHE_ENTRIES: usize = 256;
51
52pub struct ObjectStore<S: PdfSource = std::sync::Arc<[u8]>> {
56 source: S,
57 xref: XrefTable,
58 mode: ParsingMode,
59 trailer: TrailerInfo,
60 version: PdfVersion,
61 slots: HashMap<ObjectId, ObjectSlot>,
62 security_handler: Option<SecurityHandler>,
63 objstm_cache: DashMap<ObjectId, Vec<(u32, Object)>>,
66}
67
68impl<S: PdfSource> ObjectStore<S> {
69 pub fn open(source: S, mode: ParsingMode) -> Result<Self, PdfError> {
74 Self::open_with_password(source, mode, None)
75 }
76
77 pub fn open_with_password(
84 source: S,
85 mode: ParsingMode,
86 password: Option<&str>,
87 ) -> Result<Self, PdfError> {
88 let data = source.as_ref();
89
90 let (version, _header_end) = parse_header(data, mode)?;
92
93 let (xref, trailer) = match parse_all_xrefs(data, mode) {
95 Ok(result) => result,
96 Err(_) if mode == ParsingMode::Lenient => crate::xref::rebuild_xref(data)?,
97 Err(e) => return Err(e),
98 };
99
100 let slots = Self::build_slots(&xref.sections);
102
103 let mut store = Self {
104 source,
105 xref,
106 mode,
107 trailer,
108 version,
109 slots,
110 security_handler: None,
111 objstm_cache: DashMap::new(),
112 };
113
114 if let Some(encrypt_id) = store.trailer.encrypt {
116 let encrypt_dict = store
117 .resolve(encrypt_id)?
118 .as_dict()
119 .ok_or(PdfError::InvalidObject(
120 "encryption dictionary is not a dict".into(),
121 ))?
122 .clone();
123
124 let file_id = store
125 .trailer
126 .id
127 .as_ref()
128 .map(|ids| ids[0].as_slice())
129 .unwrap_or(&[]);
130
131 let pwd = password.unwrap_or("");
132
133 let handler = SecurityHandler::from_encrypt_dict(&encrypt_dict, &store, pwd, file_id)
134 .map_err(|e| match e {
135 crate::security::SecurityError::InvalidPassword => PdfError::InvalidPassword,
136 crate::security::SecurityError::UnsupportedVersion(_, _) => {
137 PdfError::UnsupportedEncryption
138 }
139 _ => PdfError::InvalidObject(e.to_string()),
140 })?;
141
142 store.security_handler = Some(handler);
143 }
144
145 Ok(store)
146 }
147
148 fn build_slots(xref_sections: &[XrefSection]) -> HashMap<ObjectId, ObjectSlot> {
154 let mut slots = HashMap::new();
155
156 for section in xref_sections {
158 for entry in §ion.entries {
159 match &entry.entry_type {
160 XrefEntryType::InUse { offset } => {
161 slots.entry(entry.id).or_insert(ObjectSlot::Direct {
162 offset: *offset,
163 parsed: OnceLock::new(),
164 });
165 }
166 XrefEntryType::InStream { stream_id, index } => {
167 slots.entry(entry.id).or_insert(ObjectSlot::InStream {
168 stream_id: *stream_id,
169 index: *index,
170 parsed: OnceLock::new(),
171 });
172 }
173 XrefEntryType::Free => {
174 slots.entry(entry.id).or_insert(ObjectSlot::Free);
175 }
176 }
177 }
178 }
179
180 slots.retain(|_, slot| !matches!(slot, ObjectSlot::Free));
182 slots
183 }
184
185 pub fn resolve(&self, id: ObjectId) -> Result<&Object, PdfError> {
188 let slot = self.slots.get(&id).ok_or(PdfError::UnknownObject(id))?;
189
190 let result = match slot {
191 ObjectSlot::Direct { offset, parsed } => parsed.get_or_init(|| {
192 let data = self.source.as_ref();
193 match parse_indirect_object(data, *offset, self.mode) {
194 Ok((_id, obj)) => Ok(obj),
195 Err(e) => Err(e),
196 }
197 }),
198 ObjectSlot::InStream {
199 stream_id,
200 index,
201 parsed,
202 } => parsed.get_or_init(|| self.parse_from_objstm(*stream_id, *index)),
203 ObjectSlot::Free => {
204 return Err(PdfError::UnknownObject(id));
206 }
207 };
208
209 match result {
210 Ok(obj) => Ok(obj),
211 Err(e) if self.mode == ParsingMode::Lenient => {
212 tracing::warn!(?id, error = ?e, "parse failed, returning Null");
213 const NULL_OBJ: Object = Object::Null;
217 Ok(&NULL_OBJ)
218 }
219 Err(_e) => Err(PdfError::UnknownObject(id)),
220 }
221 }
222
223 pub fn deep_resolve<'a>(&'a self, obj: &'a Object) -> Result<&'a Object, PdfError> {
228 let mut current = obj;
229 let mut seen = SmallVec::<[ObjectId; 8]>::new();
230
231 while let Object::Reference(id) = current {
232 if seen.len() >= MAX_RECURSION_DEPTH as usize {
233 return Err(PdfError::RecursionLimitExceeded);
234 }
235 if seen.contains(id) {
236 return Err(PdfError::CircularReference(*id));
237 }
238 seen.push(*id);
239 current = self.resolve(*id)?;
240 }
241
242 Ok(current)
243 }
244
245 pub fn dict_resolve<'a>(
247 &'a self,
248 dict: &'a HashMap<Name, Object>,
249 key: &Name,
250 ) -> Result<Option<&'a Object>, PdfError> {
251 match dict.get(key) {
252 Some(obj) => Ok(Some(self.deep_resolve(obj)?)),
253 None => Ok(None),
254 }
255 }
256
257 pub fn decode_stream(&self, stream: &Object) -> Result<Vec<u8>, PdfError> {
262 let (dict, data) = match stream {
263 Object::Stream { dict, data } => (dict, data),
264 _ => return Err(PdfError::NotAStream),
265 };
266
267 match data {
268 StreamData::Raw { offset, length } => {
269 let source = self.source.as_ref();
270 let start = *offset as usize;
271 let end = start
272 .checked_add(*length as usize)
273 .ok_or(PdfError::StreamTooLarge)?;
274 if end > source.len() {
275 return Err(PdfError::StreamTooLarge);
276 }
277 let raw = &source[start..end];
278
279 let mut filters = crate::filter::resolve_filter_chain(dict);
280 if filters.is_empty() {
281 return Ok(raw.to_vec());
282 }
283
284 self.resolve_jbig2_globals(dict, &mut filters);
285
286 rpdfium_codec::apply_filter_chain(raw, &filters)
287 .map_err(|e| PdfError::StreamDecodeError(e.to_string()))
288 }
289 StreamData::Decoded { data: bytes } => Ok(bytes.clone()),
290 }
291 }
292
293 fn parse_from_objstm(&self, stream_id: ObjectId, index: u32) -> Result<Object, PdfError> {
299 if let Some(cached) = self.objstm_cache.get(&stream_id) {
301 return match cached.get(index as usize) {
302 Some((_obj_num, obj)) => Ok(obj.clone()),
303 None => Err(PdfError::InvalidObjectStream),
304 };
305 }
306
307 let stream_obj = self.resolve(stream_id)?;
309
310 let decoded = self.decode_stream_for_object(stream_obj, stream_id)?;
313
314 let dict = match stream_obj {
315 Object::Stream { dict, .. } => dict,
316 _ => return Err(PdfError::NotAStream),
317 };
318
319 let contents = crate::object_stream::parse_object_stream(&decoded, dict, self.mode)?;
321
322 if self.objstm_cache.len() < MAX_OBJSTM_CACHE_ENTRIES {
324 let result = contents
325 .objects
326 .get(index as usize)
327 .map(|(_obj_num, obj)| obj.clone())
328 .ok_or(PdfError::InvalidObjectStream);
329
330 self.objstm_cache.insert(stream_id, contents.objects);
331
332 result
333 } else {
334 match crate::object_stream::get_object_from_stream(&contents, index) {
336 Some(obj) => Ok(obj.clone()),
337 None => Err(PdfError::InvalidObjectStream),
338 }
339 }
340 }
341
342 pub fn parsing_mode(&self) -> ParsingMode {
344 self.mode
345 }
346
347 pub fn trailer(&self) -> &TrailerInfo {
349 &self.trailer
350 }
351
352 pub fn version(&self) -> PdfVersion {
354 self.version
355 }
356
357 pub fn xref(&self) -> &XrefTable {
359 &self.xref
360 }
361
362 pub fn object_count(&self) -> usize {
364 self.slots.len()
365 }
366
367 pub fn contains(&self, id: ObjectId) -> bool {
369 self.slots.contains_key(&id)
370 }
371
372 pub fn object_ids(&self) -> impl Iterator<Item = &ObjectId> {
374 self.slots.keys()
375 }
376
377 pub fn security_handler(&self) -> Option<&SecurityHandler> {
379 self.security_handler.as_ref()
380 }
381
382 pub fn source_data(&self) -> &S {
384 &self.source
385 }
386
387 pub fn xref_start_offset(&self) -> u64 {
391 self.xref.start_offset
392 }
393
394 pub fn max_object_number(&self) -> u32 {
396 self.slots.keys().map(|id| id.number).max().unwrap_or(0)
397 }
398
399 pub fn decode_stream_for_object(
404 &self,
405 stream: &Object,
406 obj_id: ObjectId,
407 ) -> Result<Vec<u8>, PdfError> {
408 let (dict, data) = match stream {
409 Object::Stream { dict, data } => (dict, data),
410 _ => return Err(PdfError::NotAStream),
411 };
412
413 match data {
414 StreamData::Raw { offset, length } => {
415 let source = self.source.as_ref();
416 let start = *offset as usize;
417 let end = start
418 .checked_add(*length as usize)
419 .ok_or(PdfError::StreamTooLarge)?;
420 if end > source.len() {
421 return Err(PdfError::StreamTooLarge);
422 }
423 let raw = &source[start..end];
424
425 let decrypted;
427 let raw_data = if let Some(handler) = &self.security_handler {
428 decrypted = handler.decrypt_stream(raw, obj_id);
429 &decrypted
430 } else {
431 raw
432 };
433
434 let mut filters = crate::filter::resolve_filter_chain(dict);
435 if filters.is_empty() {
436 return Ok(raw_data.to_vec());
437 }
438
439 self.resolve_jbig2_globals(dict, &mut filters);
440
441 rpdfium_codec::apply_filter_chain(raw_data, &filters)
442 .map_err(|e| PdfError::StreamDecodeError(e.to_string()))
443 }
444 StreamData::Decoded { data: bytes } => Ok(bytes.clone()),
445 }
446 }
447
448 pub fn raw_stream_bytes_for_object(
452 &self,
453 stream: &Object,
454 obj_id: ObjectId,
455 ) -> Result<Vec<u8>, PdfError> {
456 let (_dict, data) = match stream {
457 Object::Stream { dict, data } => (dict, data),
458 _ => return Err(PdfError::NotAStream),
459 };
460
461 match data {
462 StreamData::Raw { offset, length } => {
463 let source = self.source.as_ref();
464 let start = *offset as usize;
465 let end = start
466 .checked_add(*length as usize)
467 .ok_or(PdfError::StreamTooLarge)?;
468 if end > source.len() {
469 return Err(PdfError::StreamTooLarge);
470 }
471 let raw = &source[start..end];
472
473 if let Some(handler) = &self.security_handler {
475 Ok(handler.decrypt_stream(raw, obj_id))
476 } else {
477 Ok(raw.to_vec())
478 }
479 }
480 StreamData::Decoded { data: bytes } => Ok(bytes.clone()),
481 }
482 }
483
484 fn resolve_jbig2_globals(
488 &self,
489 dict: &HashMap<Name, Object>,
490 filters: &mut [(rpdfium_codec::DecodeFilter, rpdfium_codec::FilterParams)],
491 ) {
492 use rpdfium_codec::DecodeFilter;
493
494 let jbig2_entry = filters.iter_mut().find(|(f, _)| *f == DecodeFilter::JBIG2);
496 let jbig2_params = match jbig2_entry {
497 Some((_, params)) => params,
498 None => return,
499 };
500
501 let decode_parms = match dict.get(&Name::decode_parms()) {
503 Some(obj) => obj,
504 None => return,
505 };
506
507 let parms_dict = match decode_parms {
510 Object::Dictionary(d) => Some(d),
511 Object::Array(arr) => {
512 let filter_obj = dict.get(&Name::filter());
514 let jbig2_idx = match filter_obj {
515 Some(Object::Array(filter_arr)) => filter_arr.iter().position(|o| {
516 o.as_name().is_some_and(|n| {
517 n.as_bytes() == b"JBIG2Decode" || n.as_bytes() == b"JBIG2"
518 })
519 }),
520 _ => Some(0), };
522 jbig2_idx
523 .and_then(|idx| arr.get(idx))
524 .and_then(|o| o.as_dict())
525 }
526 _ => None,
527 };
528
529 let parms_dict = match parms_dict {
530 Some(d) => d,
531 None => return,
532 };
533
534 let globals_obj = match parms_dict.get(&Name::jbig2_globals()) {
536 Some(obj) => obj,
537 None => return,
538 };
539
540 let globals_stream = match globals_obj {
542 Object::Reference(id) => match self.resolve(*id) {
543 Ok(obj) => obj,
544 Err(_) => return,
545 },
546 _ => return,
547 };
548
549 if let Ok(decoded) = self.decode_stream(globals_stream) {
550 jbig2_params.jbig2_globals = Some(decoded);
551 }
552 }
553
554 #[cfg(test)]
556 pub(crate) fn objstm_cache_len(&self) -> usize {
557 self.objstm_cache.len()
558 }
559
560 pub fn decrypt_string(&self, data: &[u8], obj_id: ObjectId) -> Vec<u8> {
564 match &self.security_handler {
565 Some(handler) => handler.decrypt_string(data, obj_id),
566 None => data.to_vec(),
567 }
568 }
569}
570
571#[cfg(test)]
572mod tests {
573 use super::*;
574
575 #[test]
576 fn build_slots_newest_wins() {
577 let newer = XrefSection {
578 entries: vec![crate::xref::XrefEntry {
579 id: ObjectId::new(1, 0),
580 entry_type: XrefEntryType::InUse { offset: 100 },
581 }],
582 };
583 let older = XrefSection {
584 entries: vec![crate::xref::XrefEntry {
585 id: ObjectId::new(1, 0),
586 entry_type: XrefEntryType::InUse { offset: 50 },
587 }],
588 };
589
590 let slots = ObjectStore::<Vec<u8>>::build_slots(&[newer, older]);
591 assert_eq!(slots.len(), 1);
592
593 match &slots[&ObjectId::new(1, 0)] {
594 ObjectSlot::Direct { offset, .. } => assert_eq!(*offset, 100),
595 _ => panic!("expected Direct slot"),
596 }
597 }
598
599 #[test]
600 fn build_slots_free_entries_removed() {
601 let section = XrefSection {
602 entries: vec![
603 crate::xref::XrefEntry {
604 id: ObjectId::new(0, 65535),
605 entry_type: XrefEntryType::Free,
606 },
607 crate::xref::XrefEntry {
608 id: ObjectId::new(1, 0),
609 entry_type: XrefEntryType::InUse { offset: 100 },
610 },
611 ],
612 };
613
614 let slots = ObjectStore::<Vec<u8>>::build_slots(&[section]);
615 assert_eq!(slots.len(), 1);
616 assert!(slots.contains_key(&ObjectId::new(1, 0)));
617 assert!(!slots.contains_key(&ObjectId::new(0, 65535)));
618 }
619
620 #[test]
621 fn build_slots_free_hides_older() {
622 let newer = XrefSection {
624 entries: vec![crate::xref::XrefEntry {
625 id: ObjectId::new(5, 0),
626 entry_type: XrefEntryType::Free,
627 }],
628 };
629 let older = XrefSection {
631 entries: vec![crate::xref::XrefEntry {
632 id: ObjectId::new(5, 0),
633 entry_type: XrefEntryType::InUse { offset: 200 },
634 }],
635 };
636
637 let slots = ObjectStore::<Vec<u8>>::build_slots(&[newer, older]);
638 assert!(!slots.contains_key(&ObjectId::new(5, 0)));
641 }
642
643 #[test]
644 fn build_slots_in_stream() {
645 let section = XrefSection {
646 entries: vec![crate::xref::XrefEntry {
647 id: ObjectId::new(10, 0),
648 entry_type: XrefEntryType::InStream {
649 stream_id: ObjectId::new(3, 0),
650 index: 2,
651 },
652 }],
653 };
654
655 let slots = ObjectStore::<Vec<u8>>::build_slots(&[section]);
656 assert_eq!(slots.len(), 1);
657 match &slots[&ObjectId::new(10, 0)] {
658 ObjectSlot::InStream {
659 stream_id, index, ..
660 } => {
661 assert_eq!(*stream_id, ObjectId::new(3, 0));
662 assert_eq!(*index, 2);
663 }
664 _ => panic!("expected InStream slot"),
665 }
666 }
667
668 #[test]
669 fn deep_resolve_non_reference() {
670 let pdf = build_minimal_pdf();
672 let store = ObjectStore::open(pdf, ParsingMode::Lenient);
673 if let Ok(store) = store {
675 let obj = Object::Integer(42);
676 let resolved = store.deep_resolve(&obj).unwrap();
677 assert_eq!(resolved.as_i64(), Some(42));
678 }
679 }
680
681 fn build_minimal_pdf() -> Vec<u8> {
683 let mut pdf = Vec::new();
684 pdf.extend_from_slice(b"%PDF-1.4\n");
685
686 let obj1_offset = pdf.len();
687 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
688
689 let obj2_offset = pdf.len();
690 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
691
692 let xref_offset = pdf.len();
693 pdf.extend_from_slice(b"xref\n");
694 pdf.extend_from_slice(b"0 3\n");
695 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
696 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
697 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
698 pdf.extend_from_slice(b"trailer\n");
699 pdf.extend_from_slice(b"<< /Size 3 /Root 1 0 R >>\n");
700 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
701
702 pdf
703 }
704
705 #[test]
706 fn open_minimal_pdf() {
707 let pdf = build_minimal_pdf();
708 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
709
710 assert_eq!(store.version(), PdfVersion::new(1, 4));
711 assert_eq!(store.trailer().root, ObjectId::new(1, 0));
712 assert_eq!(store.trailer().size, 3);
713 assert!(store.contains(ObjectId::new(1, 0)));
714 assert!(store.contains(ObjectId::new(2, 0)));
715 assert!(!store.contains(ObjectId::new(0, 65535)));
716 }
717
718 #[test]
719 fn resolve_object_from_minimal_pdf() {
720 let pdf = build_minimal_pdf();
721 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
722
723 let obj = store.resolve(ObjectId::new(1, 0)).unwrap();
724 let dict = obj.as_dict().unwrap();
725 assert!(dict.contains_key(&Name::r#type()));
726 }
727
728 #[test]
729 fn resolve_unknown_object() {
730 let pdf = build_minimal_pdf();
731 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
732
733 let result = store.resolve(ObjectId::new(999, 0));
734 assert!(result.is_err());
735 }
736
737 #[test]
738 fn deep_resolve_follows_reference() {
739 let pdf = build_minimal_pdf();
740 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
741
742 let reference = Object::Reference(ObjectId::new(1, 0));
743 let resolved = store.deep_resolve(&reference).unwrap();
744 assert!(resolved.as_dict().is_some());
745 }
746
747 #[test]
748 fn dict_resolve_present() {
749 let pdf = build_minimal_pdf();
750 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
751
752 let catalog = store.resolve(ObjectId::new(1, 0)).unwrap();
753 let dict = catalog.as_dict().unwrap();
754
755 let pages = store.dict_resolve(dict, &Name::pages()).unwrap();
757 assert!(pages.is_some());
758 let pages_obj = pages.unwrap();
759 assert!(pages_obj.as_dict().is_some());
760 }
761
762 #[test]
763 fn dict_resolve_absent() {
764 let pdf = build_minimal_pdf();
765 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
766
767 let catalog = store.resolve(ObjectId::new(1, 0)).unwrap();
768 let dict = catalog.as_dict().unwrap();
769
770 let nonexistent = store.dict_resolve(dict, &Name::encrypt()).unwrap();
771 assert!(nonexistent.is_none());
772 }
773
774 #[test]
780 fn deep_resolve_self_reference() {
781 let pdf = build_self_referencing_pdf();
783 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
784
785 let reference = Object::Reference(ObjectId::new(3, 0));
786 let result = store.deep_resolve(&reference);
787 assert!(result.is_err());
788 }
789
790 #[test]
792 fn deep_resolve_concrete_types() {
793 let pdf = build_minimal_pdf();
794 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
795
796 let obj = Object::Boolean(true);
798 assert_eq!(store.deep_resolve(&obj).unwrap().as_bool(), Some(true));
799
800 let obj = Object::Integer(99);
802 assert_eq!(store.deep_resolve(&obj).unwrap().as_i64(), Some(99));
803
804 let obj = Object::Null;
806 assert!(store.deep_resolve(&obj).unwrap().is_null());
807
808 let obj = Object::String(rpdfium_core::PdfString::from_bytes(b"test".to_vec()));
810 assert!(store.deep_resolve(&obj).unwrap().as_string().is_some());
811 }
812
813 #[test]
815 fn deep_resolve_unknown_reference() {
816 let pdf = build_minimal_pdf();
817 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
818
819 let reference = Object::Reference(ObjectId::new(999, 0));
820 let result = store.deep_resolve(&reference);
821 assert!(result.is_err());
822 }
823
824 #[test]
830 fn open_empty_source() {
831 let result = ObjectStore::open(Vec::new(), ParsingMode::Strict);
832 assert!(result.is_err());
833 }
834
835 #[test]
837 fn open_garbage_data() {
838 let result = ObjectStore::open(b"this is not a pdf".to_vec(), ParsingMode::Strict);
839 assert!(result.is_err());
840 }
841
842 #[test]
844 fn open_header_only() {
845 let result = ObjectStore::open(b"%PDF-1.4\n".to_vec(), ParsingMode::Strict);
846 assert!(result.is_err());
847 }
848
849 #[test]
855 fn build_slots_empty() {
856 let slots = ObjectStore::<Vec<u8>>::build_slots(&[]);
857 assert!(slots.is_empty());
858 }
859
860 #[test]
862 fn build_slots_disjoint_sections() {
863 let section1 = XrefSection {
864 entries: vec![crate::xref::XrefEntry {
865 id: ObjectId::new(1, 0),
866 entry_type: XrefEntryType::InUse { offset: 100 },
867 }],
868 };
869 let section2 = XrefSection {
870 entries: vec![crate::xref::XrefEntry {
871 id: ObjectId::new(2, 0),
872 entry_type: XrefEntryType::InUse { offset: 200 },
873 }],
874 };
875
876 let slots = ObjectStore::<Vec<u8>>::build_slots(&[section1, section2]);
877 assert_eq!(slots.len(), 2);
878 assert!(slots.contains_key(&ObjectId::new(1, 0)));
879 assert!(slots.contains_key(&ObjectId::new(2, 0)));
880 }
881
882 #[test]
884 fn build_slots_in_stream_fields() {
885 let section = XrefSection {
886 entries: vec![crate::xref::XrefEntry {
887 id: ObjectId::new(42, 0),
888 entry_type: XrefEntryType::InStream {
889 stream_id: ObjectId::new(10, 0),
890 index: 7,
891 },
892 }],
893 };
894
895 let slots = ObjectStore::<Vec<u8>>::build_slots(&[section]);
896 match &slots[&ObjectId::new(42, 0)] {
897 ObjectSlot::InStream {
898 stream_id, index, ..
899 } => {
900 assert_eq!(*stream_id, ObjectId::new(10, 0));
901 assert_eq!(*index, 7);
902 }
903 _ => panic!("expected InStream"),
904 }
905 }
906
907 #[test]
913 fn object_count_and_ids() {
914 let pdf = build_minimal_pdf();
915 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
916
917 assert_eq!(store.object_count(), 2);
918
919 let ids: Vec<ObjectId> = store.object_ids().copied().collect();
920 assert!(ids.contains(&ObjectId::new(1, 0)));
921 assert!(ids.contains(&ObjectId::new(2, 0)));
922 }
923
924 #[test]
926 fn parsing_mode_accessor() {
927 let pdf = build_minimal_pdf();
928 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
929 assert_eq!(store.parsing_mode(), ParsingMode::Strict);
930 }
931
932 #[test]
934 fn lenient_mode_returns_null_for_bad_object() {
935 let pdf = build_pdf_with_bad_object();
937 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
938
939 let result = store.resolve(ObjectId::new(3, 0));
941 match result {
942 Ok(obj) => assert!(obj.is_null()),
943 Err(_) => {
944 }
946 }
947 }
948
949 #[test]
955 fn decode_stream_not_a_stream() {
956 let pdf = build_minimal_pdf();
957 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
958
959 let obj = Object::Integer(42);
960 let result = store.decode_stream(&obj);
961 assert!(result.is_err());
962 }
963
964 #[test]
970 fn decode_stream_flate() {
971 let pdf = build_pdf_with_flate_stream();
972 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
973
974 let stream_obj = store.resolve(ObjectId::new(3, 0)).unwrap();
975 let decoded = store.decode_stream(stream_obj).unwrap();
976 assert_eq!(decoded, b"Hello, PDF stream!");
977 }
978
979 #[test]
981 fn decode_stream_ascii_hex() {
982 let pdf = build_pdf_with_ascii_hex_stream();
983 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
984
985 let stream_obj = store.resolve(ObjectId::new(3, 0)).unwrap();
986 let decoded = store.decode_stream(stream_obj).unwrap();
987 assert_eq!(decoded, b"Hello");
988 }
989
990 #[test]
992 fn decode_stream_no_filter() {
993 let pdf = build_pdf_with_raw_stream();
994 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
995
996 let stream_obj = store.resolve(ObjectId::new(3, 0)).unwrap();
997 let decoded = store.decode_stream(stream_obj).unwrap();
998 assert_eq!(decoded, b"raw data");
999 }
1000
1001 fn build_pdf_with_flate_stream() -> Vec<u8> {
1007 use flate2::Compression;
1008 use flate2::write::ZlibEncoder;
1009 use std::io::Write;
1010
1011 let original = b"Hello, PDF stream!";
1012 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1013 encoder.write_all(original).unwrap();
1014 let compressed = encoder.finish().unwrap();
1015
1016 let mut pdf = Vec::new();
1017 pdf.extend_from_slice(b"%PDF-1.4\n");
1018
1019 let obj1_offset = pdf.len();
1020 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1021
1022 let obj2_offset = pdf.len();
1023 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1024
1025 let obj3_offset = pdf.len();
1026 let stream_dict = format!(
1027 "3 0 obj\n<< /Length {} /Filter /FlateDecode >>\nstream\n",
1028 compressed.len()
1029 );
1030 pdf.extend_from_slice(stream_dict.as_bytes());
1031 pdf.extend_from_slice(&compressed);
1032 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1033
1034 let xref_offset = pdf.len();
1035 pdf.extend_from_slice(b"xref\n");
1036 pdf.extend_from_slice(b"0 4\n");
1037 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1038 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1039 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1040 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1041 pdf.extend_from_slice(b"trailer\n");
1042 pdf.extend_from_slice(b"<< /Size 4 /Root 1 0 R >>\n");
1043 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1044
1045 pdf
1046 }
1047
1048 fn build_pdf_with_ascii_hex_stream() -> Vec<u8> {
1050 let hex_data = b"48656C6C6F>";
1051
1052 let mut pdf = Vec::new();
1053 pdf.extend_from_slice(b"%PDF-1.4\n");
1054
1055 let obj1_offset = pdf.len();
1056 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1057
1058 let obj2_offset = pdf.len();
1059 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1060
1061 let obj3_offset = pdf.len();
1062 let stream_dict = format!(
1063 "3 0 obj\n<< /Length {} /Filter /ASCIIHexDecode >>\nstream\n",
1064 hex_data.len()
1065 );
1066 pdf.extend_from_slice(stream_dict.as_bytes());
1067 pdf.extend_from_slice(hex_data);
1068 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1069
1070 let xref_offset = pdf.len();
1071 pdf.extend_from_slice(b"xref\n");
1072 pdf.extend_from_slice(b"0 4\n");
1073 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1074 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1075 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1076 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1077 pdf.extend_from_slice(b"trailer\n");
1078 pdf.extend_from_slice(b"<< /Size 4 /Root 1 0 R >>\n");
1079 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1080
1081 pdf
1082 }
1083
1084 fn build_pdf_with_raw_stream() -> Vec<u8> {
1086 let raw = b"raw data";
1087
1088 let mut pdf = Vec::new();
1089 pdf.extend_from_slice(b"%PDF-1.4\n");
1090
1091 let obj1_offset = pdf.len();
1092 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1093
1094 let obj2_offset = pdf.len();
1095 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1096
1097 let obj3_offset = pdf.len();
1098 let stream_dict = format!("3 0 obj\n<< /Length {} >>\nstream\n", raw.len());
1099 pdf.extend_from_slice(stream_dict.as_bytes());
1100 pdf.extend_from_slice(raw);
1101 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1102
1103 let xref_offset = pdf.len();
1104 pdf.extend_from_slice(b"xref\n");
1105 pdf.extend_from_slice(b"0 4\n");
1106 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1107 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1108 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1109 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1110 pdf.extend_from_slice(b"trailer\n");
1111 pdf.extend_from_slice(b"<< /Size 4 /Root 1 0 R >>\n");
1112 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1113
1114 pdf
1115 }
1116
1117 fn build_self_referencing_pdf() -> Vec<u8> {
1119 let mut pdf = Vec::new();
1120 pdf.extend_from_slice(b"%PDF-1.4\n");
1121
1122 let obj1_offset = pdf.len();
1123 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1124
1125 let obj2_offset = pdf.len();
1126 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1127
1128 let obj3_offset = pdf.len();
1129 pdf.extend_from_slice(b"3 0 obj\n3 0 R\nendobj\n");
1130
1131 let xref_offset = pdf.len();
1132 pdf.extend_from_slice(b"xref\n");
1133 pdf.extend_from_slice(b"0 4\n");
1134 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1135 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1136 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1137 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1138 pdf.extend_from_slice(b"trailer\n");
1139 pdf.extend_from_slice(b"<< /Size 4 /Root 1 0 R >>\n");
1140 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1141
1142 pdf
1143 }
1144
1145 fn build_pdf_with_bad_object() -> Vec<u8> {
1147 let mut pdf = Vec::new();
1148 pdf.extend_from_slice(b"%PDF-1.4\n");
1149
1150 let obj1_offset = pdf.len();
1151 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1152
1153 let obj2_offset = pdf.len();
1154 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1155
1156 let obj3_offset = 5; let xref_offset = pdf.len();
1160 pdf.extend_from_slice(b"xref\n");
1161 pdf.extend_from_slice(b"0 4\n");
1162 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1163 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1164 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1165 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1166 pdf.extend_from_slice(b"trailer\n");
1167 pdf.extend_from_slice(b"<< /Size 4 /Root 1 0 R >>\n");
1168 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1169
1170 pdf
1171 }
1172
1173 fn build_pdf_with_corrupt_xref() -> Vec<u8> {
1175 let mut pdf = Vec::new();
1176 pdf.extend_from_slice(b"%PDF-1.4\n");
1177 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1178 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1179 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1181 pdf.extend_from_slice(b"startxref\n99999\n%%EOF");
1182 pdf
1183 }
1184
1185 #[test]
1191 fn open_corrupt_xref_lenient_rebuilds() {
1192 let pdf = build_pdf_with_corrupt_xref();
1193 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1194
1195 assert!(store.contains(ObjectId::new(1, 0)));
1197 assert!(store.contains(ObjectId::new(2, 0)));
1198 assert_eq!(store.trailer().root, ObjectId::new(1, 0));
1199 }
1200
1201 #[test]
1203 fn open_corrupt_xref_strict_fails() {
1204 let pdf = build_pdf_with_corrupt_xref();
1205 let result = ObjectStore::open(pdf, ParsingMode::Strict);
1206 assert!(result.is_err());
1207 }
1208
1209 #[test]
1211 fn open_valid_xref_uses_normal_path() {
1212 let pdf = build_minimal_pdf();
1213 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1214 assert!(store.contains(ObjectId::new(1, 0)));
1215 assert!(store.contains(ObjectId::new(2, 0)));
1216 assert_eq!(store.trailer().root, ObjectId::new(1, 0));
1217 assert_eq!(store.trailer().size, 3);
1218 }
1219
1220 #[test]
1223 fn open_wrong_xref_size_still_resolves() {
1224 let mut pdf = Vec::new();
1225 pdf.extend_from_slice(b"%PDF-1.4\n");
1226 let obj1_offset = pdf.len();
1227 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1228 let obj2_offset = pdf.len();
1229 pdf.extend_from_slice(
1230 b"2 0 obj\n<< /Type /Pages /MediaBox [0 0 200 300] /Count 1 /Kids [3 0 R] >>\nendobj\n",
1231 );
1232 let obj3_offset = pdf.len();
1233 pdf.extend_from_slice(b"3 0 obj\n<< /Type /Page /Parent 2 0 R >>\nendobj\n");
1234 let obj4_offset = pdf.len();
1235 let content = b"q 0 0 0 rg 0 290 10 10 re B* Q";
1236 pdf.extend_from_slice(
1237 format!("4 0 obj\n<< /Length {} >>\nstream\n", content.len()).as_bytes(),
1238 );
1239 pdf.extend_from_slice(content);
1240 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1241 let xref_offset = pdf.len();
1242 pdf.extend_from_slice(b"xref\n0 5\n");
1243 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1244 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1245 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1246 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1247 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj4_offset).as_bytes());
1248 pdf.extend_from_slice(b"trailer\n<< /Root 1 0 R /Size 4 >>\n");
1250 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1251
1252 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1253 assert!(store.contains(ObjectId::new(1, 0)));
1255 assert!(store.contains(ObjectId::new(2, 0)));
1256 assert!(store.contains(ObjectId::new(3, 0)));
1257 assert!(store.contains(ObjectId::new(4, 0)));
1258 assert_eq!(store.trailer().root, ObjectId::new(1, 0));
1259 assert_eq!(store.trailer().size, 4);
1261 }
1262
1263 fn build_pdf_with_objstm() -> Vec<u8> {
1270 use flate2::Compression;
1271 use flate2::write::ZlibEncoder;
1272 use std::io::Write;
1273
1274 let mut pdf = Vec::new();
1275 pdf.extend_from_slice(b"%PDF-1.5\n");
1276
1277 let obj1_offset = pdf.len();
1279 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1280
1281 let obj2_offset = pdf.len();
1283 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1284
1285 let objstm_raw = b"10 0 11 3 42 true";
1289 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1290 encoder.write_all(objstm_raw).unwrap();
1291 let objstm_compressed = encoder.finish().unwrap();
1292
1293 let obj3_offset = pdf.len();
1294 let objstm_dict = format!(
1295 "3 0 obj\n<< /Type /ObjStm /N 2 /First 10 /Length {} /Filter /FlateDecode >>\nstream\n",
1296 objstm_compressed.len()
1297 );
1298 pdf.extend_from_slice(objstm_dict.as_bytes());
1299 pdf.extend_from_slice(&objstm_compressed);
1300 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1301
1302 let mut xref_data = Vec::new();
1308 xref_data.push(0);
1310 xref_data.extend_from_slice(&(0u16).to_be_bytes());
1311 xref_data.push(255);
1312 xref_data.push(1);
1314 xref_data.extend_from_slice(&(obj1_offset as u16).to_be_bytes());
1315 xref_data.push(0);
1316 xref_data.push(1);
1318 xref_data.extend_from_slice(&(obj2_offset as u16).to_be_bytes());
1319 xref_data.push(0);
1320 xref_data.push(1);
1322 xref_data.extend_from_slice(&(obj3_offset as u16).to_be_bytes());
1323 xref_data.push(0);
1324 xref_data.push(2);
1326 xref_data.extend_from_slice(&(3u16).to_be_bytes()); xref_data.push(0); xref_data.push(2);
1330 xref_data.extend_from_slice(&(3u16).to_be_bytes()); xref_data.push(1); let xref_offset = pdf.len();
1334 let xref_dict = format!(
1335 "4 0 obj\n<< /Type /XRef /Size 12 /W [1 2 1] /Index [0 4 10 2] /Root 1 0 R /Length {} >>\nstream\n",
1336 xref_data.len()
1337 );
1338 pdf.extend_from_slice(xref_dict.as_bytes());
1339 pdf.extend_from_slice(&xref_data);
1340 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1341
1342 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1343
1344 pdf
1345 }
1346
1347 #[test]
1349 fn resolve_object_from_objstm() {
1350 let pdf = build_pdf_with_objstm();
1351 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1352
1353 let obj10 = store.resolve(ObjectId::new(10, 0)).unwrap();
1355 assert_eq!(obj10.as_i64(), Some(42));
1356
1357 let obj11 = store.resolve(ObjectId::new(11, 0)).unwrap();
1359 assert_eq!(obj11.as_bool(), Some(true));
1360 }
1361
1362 #[test]
1364 fn objstm_same_object_twice() {
1365 let pdf = build_pdf_with_objstm();
1366 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1367
1368 let first = store.resolve(ObjectId::new(10, 0)).unwrap();
1369 let second = store.resolve(ObjectId::new(10, 0)).unwrap();
1370 assert_eq!(first.as_i64(), second.as_i64());
1371 assert_eq!(first.as_i64(), Some(42));
1372 }
1373
1374 #[test]
1376 fn objstm_cache_populated_after_first_access() {
1377 let pdf = build_pdf_with_objstm();
1378 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1379
1380 assert_eq!(store.objstm_cache_len(), 0);
1382
1383 let _ = store.resolve(ObjectId::new(10, 0)).unwrap();
1385
1386 assert_eq!(store.objstm_cache_len(), 1);
1388
1389 let obj11 = store.resolve(ObjectId::new(11, 0)).unwrap();
1391 assert_eq!(obj11.as_bool(), Some(true));
1392
1393 assert_eq!(store.objstm_cache_len(), 1);
1395 }
1396
1397 #[test]
1399 fn objstm_cache_size_limit() {
1400 assert_eq!(MAX_OBJSTM_CACHE_ENTRIES, 256);
1402 }
1403
1404 #[test]
1410 fn malformed_wrong_xref_offsets_lenient() {
1411 let mut pdf = Vec::new();
1412 pdf.extend_from_slice(b"%PDF-1.4\n");
1413 let _obj1_offset = pdf.len();
1415 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1416 let obj2_offset = pdf.len();
1417 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1418 let xref_offset = pdf.len();
1419 pdf.extend_from_slice(b"xref\n0 3\n");
1420 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1421 pdf.extend_from_slice(b"0000099999 00000 n \r\n");
1423 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1424 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1425 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1426 let _ = ObjectStore::open(pdf, ParsingMode::Lenient);
1428 }
1429
1430 #[test]
1432 fn malformed_wrong_xref_offsets_strict() {
1433 let mut pdf = Vec::new();
1434 pdf.extend_from_slice(b"%PDF-1.4\n");
1435 let _obj1_offset = pdf.len();
1437 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1438 let obj2_offset = pdf.len();
1439 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1440 let xref_offset = pdf.len();
1441 pdf.extend_from_slice(b"xref\n0 3\n");
1442 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1443 pdf.extend_from_slice(b"0000099999 00000 n \r\n");
1444 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1445 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1446 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1447 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1448 let result = store.resolve(ObjectId::new(1, 0));
1450 assert!(result.is_err());
1451 }
1452
1453 #[test]
1455 fn malformed_truncated_pdf() {
1456 let pdf = b"%PDF-1.4\n1 0 obj\n<< /Type /Cat".to_vec();
1457 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1458 assert!(result.is_err());
1459 }
1460
1461 #[test]
1463 fn malformed_missing_eof_marker() {
1464 let mut pdf = Vec::new();
1465 pdf.extend_from_slice(b"%PDF-1.4\n");
1466 let obj1_offset = pdf.len();
1467 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1468 let obj2_offset = pdf.len();
1469 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1470 let xref_offset = pdf.len();
1471 pdf.extend_from_slice(b"xref\n0 3\n");
1472 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1473 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1474 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1475 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1476 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n").as_bytes());
1477 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1479 let _ = result;
1481 }
1482
1483 #[test]
1485 fn malformed_empty_pdf() {
1486 let result = ObjectStore::open(Vec::new(), ParsingMode::Lenient);
1487 assert!(result.is_err());
1488 }
1489
1490 #[test]
1492 fn malformed_header_only() {
1493 let result = ObjectStore::open(b"%PDF-1.4\n".to_vec(), ParsingMode::Lenient);
1494 assert!(result.is_err());
1495 }
1496
1497 #[test]
1499 fn malformed_invalid_startxref() {
1500 let mut pdf = Vec::new();
1501 pdf.extend_from_slice(b"%PDF-1.4\n");
1502 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1503 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1504 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1505 pdf.extend_from_slice(b"startxref\n99999999\n%%EOF");
1506 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1508 let _ = result;
1509 }
1510
1511 #[test]
1513 fn malformed_duplicate_dict_keys() {
1514 let mut pdf = Vec::new();
1515 pdf.extend_from_slice(b"%PDF-1.4\n");
1516 let obj1_offset = pdf.len();
1517 pdf.extend_from_slice(
1519 b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R /Type /Catalog >>\nendobj\n",
1520 );
1521 let obj2_offset = pdf.len();
1522 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1523 let xref_offset = pdf.len();
1524 pdf.extend_from_slice(b"xref\n0 3\n");
1525 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1526 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1527 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1528 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1529 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1530 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1531 assert!(result.is_ok());
1532 }
1533
1534 #[test]
1536 fn malformed_oversized_trailer_size() {
1537 let mut pdf = Vec::new();
1538 pdf.extend_from_slice(b"%PDF-1.4\n");
1539 let obj1_offset = pdf.len();
1540 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1541 let obj2_offset = pdf.len();
1542 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1543 let xref_offset = pdf.len();
1544 pdf.extend_from_slice(b"xref\n0 3\n");
1545 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1546 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1547 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1548 pdf.extend_from_slice(b"trailer\n<< /Size 999999999 /Root 1 0 R >>\n");
1550 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1551 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1552 let _ = result;
1554 }
1555
1556 #[test]
1558 fn malformed_resolve_nonexistent_object() {
1559 let pdf = build_minimal_pdf();
1560 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1561 let result = store.resolve(ObjectId::new(999, 0));
1562 assert!(result.is_err());
1563 }
1564
1565 #[test]
1567 fn malformed_wrong_generation() {
1568 let pdf = build_minimal_pdf();
1569 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1570 let result = store.resolve(ObjectId::new(1, 1));
1572 let _ = result;
1574 }
1575
1576 #[test]
1578 fn malformed_deeply_nested_arrays() {
1579 let mut pdf = Vec::new();
1580 pdf.extend_from_slice(b"%PDF-1.4\n");
1581 let obj1_offset = pdf.len();
1582 let mut nested = String::from("1 0 obj\n");
1583 for _ in 0..100 {
1584 nested.push('[');
1585 }
1586 nested.push_str("true");
1587 for _ in 0..100 {
1588 nested.push(']');
1589 }
1590 nested.push_str("\nendobj\n");
1591 pdf.extend_from_slice(nested.as_bytes());
1592 let obj2_offset = pdf.len();
1593 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Catalog /Pages 3 0 R >>\nendobj\n");
1594 let obj3_offset = pdf.len();
1595 pdf.extend_from_slice(b"3 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1596 let xref_offset = pdf.len();
1597 pdf.extend_from_slice(b"xref\n0 4\n");
1598 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1599 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1600 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1601 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1602 pdf.extend_from_slice(b"trailer\n<< /Size 4 /Root 2 0 R >>\n");
1603 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1604 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1605 let _ = result;
1607 }
1608
1609 #[test]
1611 fn malformed_resolve_object_zero() {
1612 let pdf = build_minimal_pdf();
1613 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1614 let result = store.resolve(ObjectId::new(0, 0));
1616 let _ = result;
1617 }
1618
1619 #[test]
1621 fn malformed_stream_wrong_length() {
1622 let mut pdf = Vec::new();
1623 pdf.extend_from_slice(b"%PDF-1.4\n");
1624 let obj1_offset = pdf.len();
1625 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1626 let obj2_offset = pdf.len();
1627 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1628 let obj3_offset = pdf.len();
1629 let content = b"BT /F1 12 Tf 10 10 Td (Hello) Tj ET";
1630 pdf.extend_from_slice(b"3 0 obj\n<< /Length 99999 >>\nstream\n");
1632 pdf.extend_from_slice(content);
1633 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1634 let xref_offset = pdf.len();
1635 pdf.extend_from_slice(b"xref\n0 4\n");
1636 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1637 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1638 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1639 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1640 pdf.extend_from_slice(b"trailer\n<< /Size 4 /Root 1 0 R >>\n");
1641 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1642 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1643 let _ = result;
1645 }
1646
1647 #[test]
1649 fn malformed_missing_root_with_catalog() {
1650 let mut pdf = Vec::new();
1651 pdf.extend_from_slice(b"%PDF-1.4\n");
1652 let obj1_offset = pdf.len();
1653 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1654 let obj2_offset = pdf.len();
1655 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1656 let xref_offset = pdf.len();
1657 pdf.extend_from_slice(b"xref\n0 3\n");
1658 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1659 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1660 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1661 pdf.extend_from_slice(b"trailer\n<< /Size 3 >>\n");
1663 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1664 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1665 assert!(result.is_ok());
1667 }
1668
1669 #[test]
1671 fn malformed_missing_root_no_catalog() {
1672 let mut pdf = Vec::new();
1673 pdf.extend_from_slice(b"%PDF-1.4\n");
1674 let obj1_offset = pdf.len();
1675 pdf.extend_from_slice(b"1 0 obj\n<< /Foo /Bar >>\nendobj\n");
1677 let obj2_offset = pdf.len();
1678 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1679 let xref_offset = pdf.len();
1680 pdf.extend_from_slice(b"xref\n0 3\n");
1681 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1682 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1683 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1684 pdf.extend_from_slice(b"trailer\n<< /Size 3 >>\n");
1686 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1687 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1688 assert!(result.is_err());
1689 }
1690
1691 #[test]
1693 fn malformed_circular_reference() {
1694 let mut pdf = Vec::new();
1695 pdf.extend_from_slice(b"%PDF-1.4\n");
1696 let obj1_offset = pdf.len();
1697 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R /Extra 2 0 R >>\nendobj\n");
1698 let obj2_offset = pdf.len();
1699 pdf.extend_from_slice(
1700 b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 /Parent 1 0 R >>\nendobj\n",
1701 );
1702 let xref_offset = pdf.len();
1703 pdf.extend_from_slice(b"xref\n0 3\n");
1704 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1705 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1706 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1707 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1708 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1709 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1710 let obj1 = store.resolve(ObjectId::new(1, 0)).unwrap();
1712 assert!(obj1.as_dict().is_some());
1713 }
1714
1715 #[test]
1717 fn malformed_whitespace_only_after_header() {
1718 let pdf = b"%PDF-1.4\n \n \n".to_vec();
1719 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1720 assert!(result.is_err());
1721 }
1722}