1use std::collections::HashMap;
12use std::sync::OnceLock;
13
14use dashmap::DashMap;
15use rpdfium_core::error::PdfError;
16use rpdfium_core::fx_system::MAX_RECURSION_DEPTH;
17use rpdfium_core::{Name, ParsingMode, PdfSource};
18use smallvec::SmallVec;
19
20use crate::header::{PdfVersion, parse_header};
21use crate::object::{Object, ObjectId, StreamData};
22use crate::object_parser::parse_indirect_object;
23use crate::security::SecurityHandler;
24use crate::trailer::{TrailerInfo, parse_all_xrefs};
25use crate::xref::{XrefEntryType, XrefSection, XrefTable};
26
27pub(crate) enum ObjectSlot {
32 Direct {
34 offset: u64,
35 parsed: OnceLock<Result<Object, PdfError>>,
36 },
37 InStream {
39 stream_id: ObjectId,
40 index: u32,
41 parsed: OnceLock<Result<Object, PdfError>>,
42 },
43 Free,
45}
46
47const MAX_OBJSTM_CACHE_ENTRIES: usize = 256;
51
52pub struct ObjectStore<S: PdfSource = std::sync::Arc<[u8]>> {
56 source: S,
57 xref: XrefTable,
58 mode: ParsingMode,
59 trailer: TrailerInfo,
60 version: PdfVersion,
61 slots: HashMap<ObjectId, ObjectSlot>,
62 security_handler: Option<SecurityHandler>,
63 objstm_cache: DashMap<ObjectId, Vec<(u32, Object)>>,
66 xref_rebuilt: bool,
69 xref_is_stream: bool,
72}
73
74impl<S: PdfSource> ObjectStore<S> {
75 pub fn open(source: S, mode: ParsingMode) -> Result<Self, PdfError> {
80 Self::open_with_password(source, mode, None)
81 }
82
83 pub fn open_with_password(
90 source: S,
91 mode: ParsingMode,
92 password: Option<&str>,
93 ) -> Result<Self, PdfError> {
94 let data = source.as_ref();
95
96 let (version, _header_end) = parse_header(data, mode)?;
98
99 let mut xref_rebuilt = false;
101 let (xref, trailer) = match parse_all_xrefs(data, mode) {
102 Ok(result) => result,
103 Err(_) if mode == ParsingMode::Lenient => {
104 xref_rebuilt = true;
105 crate::xref::rebuild_xref(data)?
106 }
107 Err(e) => return Err(e),
108 };
109
110 let slots = Self::build_slots(&xref.sections);
112
113 let xref_is_stream = if xref_rebuilt {
116 false
117 } else {
118 let start = xref.start_offset as usize;
119 if start < data.len() {
120 let mut pos = start;
121 while pos < data.len() && matches!(data[pos], b' ' | b'\t' | b'\n' | b'\r' | 0x0C) {
122 pos += 1;
123 }
124 !(pos + 4 <= data.len() && &data[pos..pos + 4] == b"xref")
125 } else {
126 false
127 }
128 };
129
130 let mut store = Self {
131 source,
132 xref,
133 mode,
134 trailer,
135 version,
136 slots,
137 security_handler: None,
138 objstm_cache: DashMap::new(),
139 xref_rebuilt,
140 xref_is_stream,
141 };
142
143 if let Some(encrypt_id) = store.trailer.encrypt {
145 let encrypt_dict = store
146 .resolve(encrypt_id)?
147 .as_dict()
148 .ok_or(PdfError::InvalidObject(
149 "encryption dictionary is not a dict".into(),
150 ))?
151 .clone();
152
153 let file_id = store
154 .trailer
155 .id
156 .as_ref()
157 .map(|ids| ids[0].as_slice())
158 .unwrap_or(&[]);
159
160 let pwd = password.unwrap_or("");
161
162 let handler = SecurityHandler::from_encrypt_dict(&encrypt_dict, &store, pwd, file_id)
163 .map_err(|e| match e {
164 crate::security::SecurityError::InvalidPassword => PdfError::InvalidPassword,
165 crate::security::SecurityError::UnsupportedVersion(_, _) => {
166 PdfError::UnsupportedEncryption
167 }
168 _ => PdfError::InvalidObject(e.to_string()),
169 })?;
170
171 store.security_handler = Some(handler);
172 }
173
174 Ok(store)
175 }
176
177 fn build_slots(xref_sections: &[XrefSection]) -> HashMap<ObjectId, ObjectSlot> {
183 let mut slots = HashMap::new();
184
185 for section in xref_sections {
187 for entry in §ion.entries {
188 match &entry.entry_type {
189 XrefEntryType::InUse { offset } => {
190 slots.entry(entry.id).or_insert(ObjectSlot::Direct {
191 offset: *offset,
192 parsed: OnceLock::new(),
193 });
194 }
195 XrefEntryType::InStream { stream_id, index } => {
196 slots.entry(entry.id).or_insert(ObjectSlot::InStream {
197 stream_id: *stream_id,
198 index: *index,
199 parsed: OnceLock::new(),
200 });
201 }
202 XrefEntryType::Free => {
203 slots.entry(entry.id).or_insert(ObjectSlot::Free);
204 }
205 }
206 }
207 }
208
209 slots.retain(|_, slot| !matches!(slot, ObjectSlot::Free));
211 slots
212 }
213
214 pub fn resolve(&self, id: ObjectId) -> Result<&Object, PdfError> {
217 let slot = self.slots.get(&id).ok_or(PdfError::UnknownObject(id))?;
218
219 let result = match slot {
220 ObjectSlot::Direct { offset, parsed } => parsed.get_or_init(|| {
221 let data = self.source.as_ref();
222 match parse_indirect_object(data, *offset, self.mode) {
223 Ok((_id, obj)) => Ok(obj),
224 Err(e) => Err(e),
225 }
226 }),
227 ObjectSlot::InStream {
228 stream_id,
229 index,
230 parsed,
231 } => parsed.get_or_init(|| self.parse_from_objstm(*stream_id, *index)),
232 ObjectSlot::Free => {
233 return Err(PdfError::UnknownObject(id));
235 }
236 };
237
238 match result {
239 Ok(obj) => Ok(obj),
240 Err(e) if self.mode == ParsingMode::Lenient => {
241 tracing::warn!(?id, error = ?e, "parse failed, returning Null");
242 const NULL_OBJ: Object = Object::Null;
246 Ok(&NULL_OBJ)
247 }
248 Err(_e) => Err(PdfError::UnknownObject(id)),
249 }
250 }
251
252 pub fn deep_resolve<'a>(&'a self, obj: &'a Object) -> Result<&'a Object, PdfError> {
257 let mut current = obj;
258 let mut seen = SmallVec::<[ObjectId; 8]>::new();
259
260 while let Object::Reference(id) = current {
261 if seen.len() >= MAX_RECURSION_DEPTH as usize {
262 return Err(PdfError::RecursionLimitExceeded);
263 }
264 if seen.contains(id) {
265 return Err(PdfError::CircularReference(*id));
266 }
267 seen.push(*id);
268 current = self.resolve(*id)?;
269 }
270
271 Ok(current)
272 }
273
274 pub fn dict_resolve<'a>(
276 &'a self,
277 dict: &'a HashMap<Name, Object>,
278 key: &Name,
279 ) -> Result<Option<&'a Object>, PdfError> {
280 match dict.get(key) {
281 Some(obj) => Ok(Some(self.deep_resolve(obj)?)),
282 None => Ok(None),
283 }
284 }
285
286 pub fn decode_stream(&self, stream: &Object) -> Result<Vec<u8>, PdfError> {
291 let (dict, data) = match stream {
292 Object::Stream { dict, data } => (dict, data),
293 _ => return Err(PdfError::NotAStream),
294 };
295
296 match data {
297 StreamData::Raw { offset, length } => {
298 let source = self.source.as_ref();
299 let start = *offset as usize;
300 let end = start
301 .checked_add(*length as usize)
302 .ok_or(PdfError::StreamTooLarge)?;
303 if end > source.len() {
304 return Err(PdfError::StreamTooLarge);
305 }
306 let raw = &source[start..end];
307
308 let mut filters = crate::filter::resolve_filter_chain(dict);
309 if filters.is_empty() {
310 return Ok(raw.to_vec());
311 }
312
313 self.resolve_jbig2_globals(dict, &mut filters);
314
315 rpdfium_codec::apply_filter_chain(raw, &filters)
316 .map_err(|e| PdfError::StreamDecodeError(e.to_string()))
317 }
318 StreamData::Decoded { data: bytes } => Ok(bytes.clone()),
319 }
320 }
321
322 fn parse_from_objstm(&self, stream_id: ObjectId, index: u32) -> Result<Object, PdfError> {
328 if let Some(cached) = self.objstm_cache.get(&stream_id) {
330 return match cached.get(index as usize) {
331 Some((_obj_num, obj)) => Ok(obj.clone()),
332 None => Err(PdfError::InvalidObjectStream),
333 };
334 }
335
336 let stream_obj = self.resolve(stream_id)?;
338
339 let decoded = self.decode_stream_for_object(stream_obj, stream_id)?;
342
343 let dict = match stream_obj {
344 Object::Stream { dict, .. } => dict,
345 _ => return Err(PdfError::NotAStream),
346 };
347
348 let contents = crate::object_stream::parse_object_stream(&decoded, dict, self.mode)?;
350
351 if self.objstm_cache.len() < MAX_OBJSTM_CACHE_ENTRIES {
353 let result = contents
354 .objects
355 .get(index as usize)
356 .map(|(_obj_num, obj)| obj.clone())
357 .ok_or(PdfError::InvalidObjectStream);
358
359 self.objstm_cache.insert(stream_id, contents.objects);
360
361 result
362 } else {
363 match crate::object_stream::get_object_from_stream(&contents, index) {
365 Some(obj) => Ok(obj.clone()),
366 None => Err(PdfError::InvalidObjectStream),
367 }
368 }
369 }
370
371 pub fn parsing_mode(&self) -> ParsingMode {
373 self.mode
374 }
375
376 pub fn trailer(&self) -> &TrailerInfo {
378 &self.trailer
379 }
380
381 #[inline]
385 pub fn get_trailer(&self) -> &TrailerInfo {
386 self.trailer()
387 }
388
389 pub fn file_version(&self) -> PdfVersion {
393 self.version
394 }
395
396 #[inline]
400 pub fn get_file_version(&self) -> PdfVersion {
401 self.file_version()
402 }
403
404 #[inline]
406 #[deprecated(since = "0.0.0", note = "use `file_version()` or `get_file_version()`")]
407 pub fn version(&self) -> PdfVersion {
408 self.file_version()
409 }
410
411 pub fn xref(&self) -> &XrefTable {
413 &self.xref
414 }
415
416 pub fn object_count(&self) -> usize {
418 self.slots.len()
419 }
420
421 pub fn contains(&self, id: ObjectId) -> bool {
423 self.slots.contains_key(&id)
424 }
425
426 pub fn object_ids(&self) -> impl Iterator<Item = &ObjectId> {
428 self.slots.keys()
429 }
430
431 pub fn security_handler(&self) -> Option<&SecurityHandler> {
433 self.security_handler.as_ref()
434 }
435
436 #[inline]
440 pub fn get_security_handler(&self) -> Option<&SecurityHandler> {
441 self.security_handler()
442 }
443
444 pub fn permissions(&self) -> Option<crate::security::Permissions> {
451 self.security_handler.as_ref().map(|h| h.permissions())
452 }
453
454 #[inline]
458 pub fn get_permissions(&self) -> Option<crate::security::Permissions> {
459 self.permissions()
460 }
461
462 pub fn encoded_password(&self) -> Option<&[u8]> {
470 self.security_handler.as_ref().map(|h| h.encoded_password())
471 }
472
473 #[inline]
477 pub fn get_encoded_password(&self) -> Option<&[u8]> {
478 self.encoded_password()
479 }
480
481 pub fn xref_table_rebuilt(&self) -> bool {
488 self.xref_rebuilt
489 }
490
491 #[inline]
495 pub fn is_xref_table_rebuilt(&self) -> bool {
496 self.xref_table_rebuilt()
497 }
498
499 #[inline]
501 #[deprecated(
502 since = "0.0.0",
503 note = "use `xref_table_rebuilt()` or `is_xref_table_rebuilt()`"
504 )]
505 pub fn xref_rebuilt(&self) -> bool {
506 self.xref_table_rebuilt()
507 }
508
509 pub fn is_xref_stream(&self) -> bool {
514 self.xref_is_stream
515 }
516
517 pub fn object_position_or_zero(&self, id: ObjectId) -> Option<u64> {
523 match self.slots.get(&id)? {
524 ObjectSlot::Direct { offset, .. } => Some(*offset),
525 ObjectSlot::InStream { .. } | ObjectSlot::Free => None,
526 }
527 }
528
529 #[inline]
533 pub fn get_object_position_or_zero(&self, id: ObjectId) -> Option<u64> {
534 self.object_position_or_zero(id)
535 }
536
537 #[inline]
539 #[deprecated(
540 since = "0.0.0",
541 note = "use `object_position_or_zero()` or `get_object_position_or_zero()`"
542 )]
543 pub fn object_offset(&self, id: ObjectId) -> Option<u64> {
544 self.object_position_or_zero(id)
545 }
546
547 pub fn source_data(&self) -> &S {
549 &self.source
550 }
551
552 pub fn last_obj_num(&self) -> u32 {
556 self.slots.keys().map(|id| id.number).max().unwrap_or(0)
557 }
558
559 #[inline]
563 pub fn get_last_obj_num(&self) -> u32 {
564 self.last_obj_num()
565 }
566
567 #[inline]
569 #[deprecated(since = "0.0.0", note = "use `last_obj_num()` or `get_last_obj_num()`")]
570 pub fn max_object_number(&self) -> u32 {
571 self.last_obj_num()
572 }
573
574 pub fn last_xref_offset(&self) -> u64 {
580 self.xref.start_offset
581 }
582
583 #[inline]
585 #[deprecated(
586 since = "0.0.0",
587 note = "use `last_xref_offset()` or `get_last_xref_offset()`"
588 )]
589 pub fn xref_start_offset(&self) -> u64 {
590 self.last_xref_offset()
591 }
592
593 pub fn is_valid_object_number(&self, number: u32) -> bool {
598 self.contains(ObjectId::new(number, 0)) || self.slots.keys().any(|id| id.number == number)
599 }
600
601 #[inline]
605 pub fn get_last_xref_offset(&self) -> u64 {
606 self.last_xref_offset()
607 }
608
609 pub fn is_object_free(&self, number: u32) -> bool {
614 !self.slots.keys().any(|id| id.number == number)
615 }
616
617 #[inline]
621 pub fn is_object_free_or_null(&self, number: u32) -> bool {
622 self.is_object_free(number)
623 }
624
625 pub fn document_size(&self) -> usize {
629 self.source.as_ref().len()
630 }
631
632 #[inline]
636 pub fn get_document_size(&self) -> usize {
637 self.document_size()
638 }
639
640 pub fn decode_stream_for_object(
645 &self,
646 stream: &Object,
647 obj_id: ObjectId,
648 ) -> Result<Vec<u8>, PdfError> {
649 let (dict, data) = match stream {
650 Object::Stream { dict, data } => (dict, data),
651 _ => return Err(PdfError::NotAStream),
652 };
653
654 match data {
655 StreamData::Raw { offset, length } => {
656 let source = self.source.as_ref();
657 let start = *offset as usize;
658 let end = start
659 .checked_add(*length as usize)
660 .ok_or(PdfError::StreamTooLarge)?;
661 if end > source.len() {
662 return Err(PdfError::StreamTooLarge);
663 }
664 let raw = &source[start..end];
665
666 let decrypted;
668 let raw_data = if let Some(handler) = &self.security_handler {
669 decrypted = handler.decrypt_stream(raw, obj_id);
670 &decrypted
671 } else {
672 raw
673 };
674
675 let mut filters = crate::filter::resolve_filter_chain(dict);
676 if filters.is_empty() {
677 return Ok(raw_data.to_vec());
678 }
679
680 self.resolve_jbig2_globals(dict, &mut filters);
681
682 rpdfium_codec::apply_filter_chain(raw_data, &filters)
683 .map_err(|e| PdfError::StreamDecodeError(e.to_string()))
684 }
685 StreamData::Decoded { data: bytes } => Ok(bytes.clone()),
686 }
687 }
688
689 pub fn raw_stream_bytes_for_object(
693 &self,
694 stream: &Object,
695 obj_id: ObjectId,
696 ) -> Result<Vec<u8>, PdfError> {
697 let (_dict, data) = match stream {
698 Object::Stream { dict, data } => (dict, data),
699 _ => return Err(PdfError::NotAStream),
700 };
701
702 match data {
703 StreamData::Raw { offset, length } => {
704 let source = self.source.as_ref();
705 let start = *offset as usize;
706 let end = start
707 .checked_add(*length as usize)
708 .ok_or(PdfError::StreamTooLarge)?;
709 if end > source.len() {
710 return Err(PdfError::StreamTooLarge);
711 }
712 let raw = &source[start..end];
713
714 if let Some(handler) = &self.security_handler {
716 Ok(handler.decrypt_stream(raw, obj_id))
717 } else {
718 Ok(raw.to_vec())
719 }
720 }
721 StreamData::Decoded { data: bytes } => Ok(bytes.clone()),
722 }
723 }
724
725 fn resolve_jbig2_globals(
729 &self,
730 dict: &HashMap<Name, Object>,
731 filters: &mut [(rpdfium_codec::DecodeFilter, rpdfium_codec::FilterParams)],
732 ) {
733 use rpdfium_codec::DecodeFilter;
734
735 let jbig2_entry = filters.iter_mut().find(|(f, _)| *f == DecodeFilter::JBIG2);
737 let jbig2_params = match jbig2_entry {
738 Some((_, params)) => params,
739 None => return,
740 };
741
742 let decode_parms = match dict.get(&Name::decode_parms()) {
744 Some(obj) => obj,
745 None => return,
746 };
747
748 let parms_dict = match decode_parms {
751 Object::Dictionary(d) => Some(d),
752 Object::Array(arr) => {
753 let filter_obj = dict.get(&Name::filter());
755 let jbig2_idx = match filter_obj {
756 Some(Object::Array(filter_arr)) => filter_arr.iter().position(|o| {
757 o.as_name().is_some_and(|n| {
758 n.as_bytes() == b"JBIG2Decode" || n.as_bytes() == b"JBIG2"
759 })
760 }),
761 _ => Some(0), };
763 jbig2_idx
764 .and_then(|idx| arr.get(idx))
765 .and_then(|o| o.as_dict())
766 }
767 _ => None,
768 };
769
770 let parms_dict = match parms_dict {
771 Some(d) => d,
772 None => return,
773 };
774
775 let globals_obj = match parms_dict.get(&Name::jbig2_globals()) {
777 Some(obj) => obj,
778 None => return,
779 };
780
781 let globals_stream = match globals_obj {
783 Object::Reference(id) => match self.resolve(*id) {
784 Ok(obj) => obj,
785 Err(_) => return,
786 },
787 _ => return,
788 };
789
790 if let Ok(decoded) = self.decode_stream(globals_stream) {
791 jbig2_params.jbig2_globals = Some(decoded);
792 }
793 }
794
795 #[cfg(test)]
797 pub(crate) fn objstm_cache_len(&self) -> usize {
798 self.objstm_cache.len()
799 }
800
801 pub fn decrypt_string(&self, data: &[u8], obj_id: ObjectId) -> Vec<u8> {
805 match &self.security_handler {
806 Some(handler) => handler.decrypt_string(data, obj_id),
807 None => data.to_vec(),
808 }
809 }
810}
811
812#[cfg(test)]
813mod tests {
814 use super::*;
815
816 #[test]
817 fn test_build_slots_newest_wins() {
818 let newer = XrefSection {
819 entries: vec![crate::xref::XrefEntry {
820 id: ObjectId::new(1, 0),
821 entry_type: XrefEntryType::InUse { offset: 100 },
822 }],
823 };
824 let older = XrefSection {
825 entries: vec![crate::xref::XrefEntry {
826 id: ObjectId::new(1, 0),
827 entry_type: XrefEntryType::InUse { offset: 50 },
828 }],
829 };
830
831 let slots = ObjectStore::<Vec<u8>>::build_slots(&[newer, older]);
832 assert_eq!(slots.len(), 1);
833
834 match &slots[&ObjectId::new(1, 0)] {
835 ObjectSlot::Direct { offset, .. } => assert_eq!(*offset, 100),
836 _ => panic!("expected Direct slot"),
837 }
838 }
839
840 #[test]
841 fn test_build_slots_free_entries_removed() {
842 let section = XrefSection {
843 entries: vec![
844 crate::xref::XrefEntry {
845 id: ObjectId::new(0, 65535),
846 entry_type: XrefEntryType::Free,
847 },
848 crate::xref::XrefEntry {
849 id: ObjectId::new(1, 0),
850 entry_type: XrefEntryType::InUse { offset: 100 },
851 },
852 ],
853 };
854
855 let slots = ObjectStore::<Vec<u8>>::build_slots(&[section]);
856 assert_eq!(slots.len(), 1);
857 assert!(slots.contains_key(&ObjectId::new(1, 0)));
858 assert!(!slots.contains_key(&ObjectId::new(0, 65535)));
859 }
860
861 #[test]
862 fn test_build_slots_free_hides_older() {
863 let newer = XrefSection {
865 entries: vec![crate::xref::XrefEntry {
866 id: ObjectId::new(5, 0),
867 entry_type: XrefEntryType::Free,
868 }],
869 };
870 let older = XrefSection {
872 entries: vec![crate::xref::XrefEntry {
873 id: ObjectId::new(5, 0),
874 entry_type: XrefEntryType::InUse { offset: 200 },
875 }],
876 };
877
878 let slots = ObjectStore::<Vec<u8>>::build_slots(&[newer, older]);
879 assert!(!slots.contains_key(&ObjectId::new(5, 0)));
882 }
883
884 #[test]
885 fn test_build_slots_in_stream() {
886 let section = XrefSection {
887 entries: vec![crate::xref::XrefEntry {
888 id: ObjectId::new(10, 0),
889 entry_type: XrefEntryType::InStream {
890 stream_id: ObjectId::new(3, 0),
891 index: 2,
892 },
893 }],
894 };
895
896 let slots = ObjectStore::<Vec<u8>>::build_slots(&[section]);
897 assert_eq!(slots.len(), 1);
898 match &slots[&ObjectId::new(10, 0)] {
899 ObjectSlot::InStream {
900 stream_id, index, ..
901 } => {
902 assert_eq!(*stream_id, ObjectId::new(3, 0));
903 assert_eq!(*index, 2);
904 }
905 _ => panic!("expected InStream slot"),
906 }
907 }
908
909 #[test]
910 fn test_deep_resolve_non_reference() {
911 let pdf = build_minimal_pdf();
913 let store = ObjectStore::open(pdf, ParsingMode::Lenient);
914 if let Ok(store) = store {
916 let obj = Object::Integer(42);
917 let resolved = store.deep_resolve(&obj).unwrap();
918 assert_eq!(resolved.as_i64(), Some(42));
919 }
920 }
921
922 fn build_minimal_pdf() -> Vec<u8> {
924 let mut pdf = Vec::new();
925 pdf.extend_from_slice(b"%PDF-1.4\n");
926
927 let obj1_offset = pdf.len();
928 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
929
930 let obj2_offset = pdf.len();
931 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
932
933 let xref_offset = pdf.len();
934 pdf.extend_from_slice(b"xref\n");
935 pdf.extend_from_slice(b"0 3\n");
936 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
937 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
938 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
939 pdf.extend_from_slice(b"trailer\n");
940 pdf.extend_from_slice(b"<< /Size 3 /Root 1 0 R >>\n");
941 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
942
943 pdf
944 }
945
946 #[test]
947 fn test_open_minimal_pdf() {
948 let pdf = build_minimal_pdf();
949 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
950
951 assert_eq!(store.file_version(), PdfVersion::new(1, 4));
952 assert_eq!(store.trailer().root, ObjectId::new(1, 0));
953 assert_eq!(store.trailer().size, 3);
954 assert!(store.contains(ObjectId::new(1, 0)));
955 assert!(store.contains(ObjectId::new(2, 0)));
956 assert!(!store.contains(ObjectId::new(0, 65535)));
957 }
958
959 #[test]
960 fn test_resolve_object_from_minimal_pdf() {
961 let pdf = build_minimal_pdf();
962 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
963
964 let obj = store.resolve(ObjectId::new(1, 0)).unwrap();
965 let dict = obj.as_dict().unwrap();
966 assert!(dict.contains_key(&Name::r#type()));
967 }
968
969 #[test]
970 fn test_resolve_unknown_object() {
971 let pdf = build_minimal_pdf();
972 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
973
974 let result = store.resolve(ObjectId::new(999, 0));
975 assert!(result.is_err());
976 }
977
978 #[test]
979 fn test_deep_resolve_follows_reference() {
980 let pdf = build_minimal_pdf();
981 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
982
983 let reference = Object::Reference(ObjectId::new(1, 0));
984 let resolved = store.deep_resolve(&reference).unwrap();
985 assert!(resolved.as_dict().is_some());
986 }
987
988 #[test]
989 fn test_dict_resolve_present() {
990 let pdf = build_minimal_pdf();
991 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
992
993 let catalog = store.resolve(ObjectId::new(1, 0)).unwrap();
994 let dict = catalog.as_dict().unwrap();
995
996 let pages = store.dict_resolve(dict, &Name::pages()).unwrap();
998 assert!(pages.is_some());
999 let pages_obj = pages.unwrap();
1000 assert!(pages_obj.as_dict().is_some());
1001 }
1002
1003 #[test]
1004 fn test_dict_resolve_absent() {
1005 let pdf = build_minimal_pdf();
1006 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1007
1008 let catalog = store.resolve(ObjectId::new(1, 0)).unwrap();
1009 let dict = catalog.as_dict().unwrap();
1010
1011 let nonexistent = store.dict_resolve(dict, &Name::encrypt()).unwrap();
1012 assert!(nonexistent.is_none());
1013 }
1014
1015 #[test]
1021 fn test_deep_resolve_self_reference() {
1022 let pdf = build_self_referencing_pdf();
1024 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1025
1026 let reference = Object::Reference(ObjectId::new(3, 0));
1027 let result = store.deep_resolve(&reference);
1028 assert!(result.is_err());
1029 }
1030
1031 #[test]
1033 fn test_deep_resolve_concrete_types() {
1034 let pdf = build_minimal_pdf();
1035 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1036
1037 let obj = Object::Boolean(true);
1039 assert_eq!(store.deep_resolve(&obj).unwrap().as_bool(), Some(true));
1040
1041 let obj = Object::Integer(99);
1043 assert_eq!(store.deep_resolve(&obj).unwrap().as_i64(), Some(99));
1044
1045 let obj = Object::Null;
1047 assert!(store.deep_resolve(&obj).unwrap().is_null());
1048
1049 let obj = Object::String(rpdfium_core::PdfString::from_bytes(b"test".to_vec()));
1051 assert!(store.deep_resolve(&obj).unwrap().as_string().is_some());
1052 }
1053
1054 #[test]
1056 fn test_deep_resolve_unknown_reference() {
1057 let pdf = build_minimal_pdf();
1058 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1059
1060 let reference = Object::Reference(ObjectId::new(999, 0));
1061 let result = store.deep_resolve(&reference);
1062 assert!(result.is_err());
1063 }
1064
1065 #[test]
1071 fn test_open_empty_source() {
1072 let result = ObjectStore::open(Vec::new(), ParsingMode::Strict);
1073 assert!(result.is_err());
1074 }
1075
1076 #[test]
1078 fn test_open_garbage_data() {
1079 let result = ObjectStore::open(b"this is not a pdf".to_vec(), ParsingMode::Strict);
1080 assert!(result.is_err());
1081 }
1082
1083 #[test]
1085 fn test_open_header_only() {
1086 let result = ObjectStore::open(b"%PDF-1.4\n".to_vec(), ParsingMode::Strict);
1087 assert!(result.is_err());
1088 }
1089
1090 #[test]
1096 fn test_build_slots_empty() {
1097 let slots = ObjectStore::<Vec<u8>>::build_slots(&[]);
1098 assert!(slots.is_empty());
1099 }
1100
1101 #[test]
1103 fn test_build_slots_disjoint_sections() {
1104 let section1 = XrefSection {
1105 entries: vec![crate::xref::XrefEntry {
1106 id: ObjectId::new(1, 0),
1107 entry_type: XrefEntryType::InUse { offset: 100 },
1108 }],
1109 };
1110 let section2 = XrefSection {
1111 entries: vec![crate::xref::XrefEntry {
1112 id: ObjectId::new(2, 0),
1113 entry_type: XrefEntryType::InUse { offset: 200 },
1114 }],
1115 };
1116
1117 let slots = ObjectStore::<Vec<u8>>::build_slots(&[section1, section2]);
1118 assert_eq!(slots.len(), 2);
1119 assert!(slots.contains_key(&ObjectId::new(1, 0)));
1120 assert!(slots.contains_key(&ObjectId::new(2, 0)));
1121 }
1122
1123 #[test]
1125 fn test_build_slots_in_stream_fields() {
1126 let section = XrefSection {
1127 entries: vec![crate::xref::XrefEntry {
1128 id: ObjectId::new(42, 0),
1129 entry_type: XrefEntryType::InStream {
1130 stream_id: ObjectId::new(10, 0),
1131 index: 7,
1132 },
1133 }],
1134 };
1135
1136 let slots = ObjectStore::<Vec<u8>>::build_slots(&[section]);
1137 match &slots[&ObjectId::new(42, 0)] {
1138 ObjectSlot::InStream {
1139 stream_id, index, ..
1140 } => {
1141 assert_eq!(*stream_id, ObjectId::new(10, 0));
1142 assert_eq!(*index, 7);
1143 }
1144 _ => panic!("expected InStream"),
1145 }
1146 }
1147
1148 #[test]
1154 fn test_object_count_and_ids() {
1155 let pdf = build_minimal_pdf();
1156 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1157
1158 assert_eq!(store.object_count(), 2);
1159
1160 let ids: Vec<ObjectId> = store.object_ids().copied().collect();
1161 assert!(ids.contains(&ObjectId::new(1, 0)));
1162 assert!(ids.contains(&ObjectId::new(2, 0)));
1163 }
1164
1165 #[test]
1167 fn test_parsing_mode_accessor() {
1168 let pdf = build_minimal_pdf();
1169 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1170 assert_eq!(store.parsing_mode(), ParsingMode::Strict);
1171 }
1172
1173 #[test]
1175 fn test_lenient_mode_returns_null_for_bad_object() {
1176 let pdf = build_pdf_with_bad_object();
1178 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1179
1180 let result = store.resolve(ObjectId::new(3, 0));
1182 match result {
1183 Ok(obj) => assert!(obj.is_null()),
1184 Err(_) => {
1185 }
1187 }
1188 }
1189
1190 #[test]
1196 fn test_decode_stream_not_a_stream() {
1197 let pdf = build_minimal_pdf();
1198 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1199
1200 let obj = Object::Integer(42);
1201 let result = store.decode_stream(&obj);
1202 assert!(result.is_err());
1203 }
1204
1205 #[test]
1211 fn test_decode_stream_flate() {
1212 let pdf = build_pdf_with_flate_stream();
1213 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1214
1215 let stream_obj = store.resolve(ObjectId::new(3, 0)).unwrap();
1216 let decoded = store.decode_stream(stream_obj).unwrap();
1217 assert_eq!(decoded, b"Hello, PDF stream!");
1218 }
1219
1220 #[test]
1222 fn test_decode_stream_ascii_hex() {
1223 let pdf = build_pdf_with_ascii_hex_stream();
1224 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1225
1226 let stream_obj = store.resolve(ObjectId::new(3, 0)).unwrap();
1227 let decoded = store.decode_stream(stream_obj).unwrap();
1228 assert_eq!(decoded, b"Hello");
1229 }
1230
1231 #[test]
1233 fn test_decode_stream_no_filter() {
1234 let pdf = build_pdf_with_raw_stream();
1235 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1236
1237 let stream_obj = store.resolve(ObjectId::new(3, 0)).unwrap();
1238 let decoded = store.decode_stream(stream_obj).unwrap();
1239 assert_eq!(decoded, b"raw data");
1240 }
1241
1242 fn build_pdf_with_flate_stream() -> Vec<u8> {
1248 use flate2::Compression;
1249 use flate2::write::ZlibEncoder;
1250 use std::io::Write;
1251
1252 let original = b"Hello, PDF stream!";
1253 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1254 encoder.write_all(original).unwrap();
1255 let compressed = encoder.finish().unwrap();
1256
1257 let mut pdf = Vec::new();
1258 pdf.extend_from_slice(b"%PDF-1.4\n");
1259
1260 let obj1_offset = pdf.len();
1261 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1262
1263 let obj2_offset = pdf.len();
1264 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1265
1266 let obj3_offset = pdf.len();
1267 let stream_dict = format!(
1268 "3 0 obj\n<< /Length {} /Filter /FlateDecode >>\nstream\n",
1269 compressed.len()
1270 );
1271 pdf.extend_from_slice(stream_dict.as_bytes());
1272 pdf.extend_from_slice(&compressed);
1273 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1274
1275 let xref_offset = pdf.len();
1276 pdf.extend_from_slice(b"xref\n");
1277 pdf.extend_from_slice(b"0 4\n");
1278 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1279 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1280 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1281 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1282 pdf.extend_from_slice(b"trailer\n");
1283 pdf.extend_from_slice(b"<< /Size 4 /Root 1 0 R >>\n");
1284 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1285
1286 pdf
1287 }
1288
1289 fn build_pdf_with_ascii_hex_stream() -> Vec<u8> {
1291 let hex_data = b"48656C6C6F>";
1292
1293 let mut pdf = Vec::new();
1294 pdf.extend_from_slice(b"%PDF-1.4\n");
1295
1296 let obj1_offset = pdf.len();
1297 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1298
1299 let obj2_offset = pdf.len();
1300 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1301
1302 let obj3_offset = pdf.len();
1303 let stream_dict = format!(
1304 "3 0 obj\n<< /Length {} /Filter /ASCIIHexDecode >>\nstream\n",
1305 hex_data.len()
1306 );
1307 pdf.extend_from_slice(stream_dict.as_bytes());
1308 pdf.extend_from_slice(hex_data);
1309 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1310
1311 let xref_offset = pdf.len();
1312 pdf.extend_from_slice(b"xref\n");
1313 pdf.extend_from_slice(b"0 4\n");
1314 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1315 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1316 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1317 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1318 pdf.extend_from_slice(b"trailer\n");
1319 pdf.extend_from_slice(b"<< /Size 4 /Root 1 0 R >>\n");
1320 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1321
1322 pdf
1323 }
1324
1325 fn build_pdf_with_raw_stream() -> Vec<u8> {
1327 let raw = b"raw data";
1328
1329 let mut pdf = Vec::new();
1330 pdf.extend_from_slice(b"%PDF-1.4\n");
1331
1332 let obj1_offset = pdf.len();
1333 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1334
1335 let obj2_offset = pdf.len();
1336 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1337
1338 let obj3_offset = pdf.len();
1339 let stream_dict = format!("3 0 obj\n<< /Length {} >>\nstream\n", raw.len());
1340 pdf.extend_from_slice(stream_dict.as_bytes());
1341 pdf.extend_from_slice(raw);
1342 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1343
1344 let xref_offset = pdf.len();
1345 pdf.extend_from_slice(b"xref\n");
1346 pdf.extend_from_slice(b"0 4\n");
1347 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1348 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1349 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1350 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1351 pdf.extend_from_slice(b"trailer\n");
1352 pdf.extend_from_slice(b"<< /Size 4 /Root 1 0 R >>\n");
1353 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1354
1355 pdf
1356 }
1357
1358 fn build_self_referencing_pdf() -> Vec<u8> {
1360 let mut pdf = Vec::new();
1361 pdf.extend_from_slice(b"%PDF-1.4\n");
1362
1363 let obj1_offset = pdf.len();
1364 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1365
1366 let obj2_offset = pdf.len();
1367 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1368
1369 let obj3_offset = pdf.len();
1370 pdf.extend_from_slice(b"3 0 obj\n3 0 R\nendobj\n");
1371
1372 let xref_offset = pdf.len();
1373 pdf.extend_from_slice(b"xref\n");
1374 pdf.extend_from_slice(b"0 4\n");
1375 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1376 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1377 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1378 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1379 pdf.extend_from_slice(b"trailer\n");
1380 pdf.extend_from_slice(b"<< /Size 4 /Root 1 0 R >>\n");
1381 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1382
1383 pdf
1384 }
1385
1386 fn build_pdf_with_bad_object() -> Vec<u8> {
1388 let mut pdf = Vec::new();
1389 pdf.extend_from_slice(b"%PDF-1.4\n");
1390
1391 let obj1_offset = pdf.len();
1392 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1393
1394 let obj2_offset = pdf.len();
1395 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1396
1397 let obj3_offset = 5; let xref_offset = pdf.len();
1401 pdf.extend_from_slice(b"xref\n");
1402 pdf.extend_from_slice(b"0 4\n");
1403 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1404 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1405 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1406 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1407 pdf.extend_from_slice(b"trailer\n");
1408 pdf.extend_from_slice(b"<< /Size 4 /Root 1 0 R >>\n");
1409 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1410
1411 pdf
1412 }
1413
1414 fn build_pdf_with_corrupt_xref() -> Vec<u8> {
1416 let mut pdf = Vec::new();
1417 pdf.extend_from_slice(b"%PDF-1.4\n");
1418 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1419 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1420 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1422 pdf.extend_from_slice(b"startxref\n99999\n%%EOF");
1423 pdf
1424 }
1425
1426 #[test]
1432 fn test_open_corrupt_xref_lenient_rebuilds() {
1433 let pdf = build_pdf_with_corrupt_xref();
1434 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1435
1436 assert!(store.contains(ObjectId::new(1, 0)));
1438 assert!(store.contains(ObjectId::new(2, 0)));
1439 assert_eq!(store.trailer().root, ObjectId::new(1, 0));
1440 }
1441
1442 #[test]
1444 fn test_open_corrupt_xref_strict_fails() {
1445 let pdf = build_pdf_with_corrupt_xref();
1446 let result = ObjectStore::open(pdf, ParsingMode::Strict);
1447 assert!(result.is_err());
1448 }
1449
1450 #[test]
1452 fn test_open_valid_xref_uses_normal_path() {
1453 let pdf = build_minimal_pdf();
1454 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1455 assert!(store.contains(ObjectId::new(1, 0)));
1456 assert!(store.contains(ObjectId::new(2, 0)));
1457 assert_eq!(store.trailer().root, ObjectId::new(1, 0));
1458 assert_eq!(store.trailer().size, 3);
1459 }
1460
1461 #[test]
1463 fn test_xref_rebuilt_true_for_corrupt_xref() {
1464 let pdf = build_pdf_with_corrupt_xref();
1465 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1466 assert!(store.xref_table_rebuilt());
1467 }
1468
1469 #[test]
1471 fn test_xref_rebuilt_false_for_valid_pdf() {
1472 let pdf = build_minimal_pdf();
1473 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1474 assert!(!store.xref_table_rebuilt());
1475 }
1476
1477 #[test]
1479 fn test_is_xref_stream_false_for_traditional_xref() {
1480 let pdf = build_minimal_pdf();
1481 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1482 assert!(!store.is_xref_stream());
1483 }
1484
1485 #[test]
1487 fn test_is_xref_stream_false_when_rebuilt() {
1488 let pdf = build_pdf_with_corrupt_xref();
1489 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1490 assert!(!store.is_xref_stream());
1491 }
1492
1493 #[test]
1495 fn test_object_offset_returns_offset_for_direct_object() {
1496 let pdf = build_minimal_pdf();
1497 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1498 let offset = store.object_position_or_zero(ObjectId::new(1, 0));
1500 assert!(offset.is_some());
1501 assert!(offset.unwrap() > 0);
1502 }
1503
1504 #[test]
1506 fn test_object_offset_returns_none_for_unknown_object() {
1507 let pdf = build_minimal_pdf();
1508 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1509 assert_eq!(store.object_position_or_zero(ObjectId::new(999, 0)), None);
1510 }
1511
1512 #[test]
1515 fn test_open_wrong_xref_size_still_resolves() {
1516 let mut pdf = Vec::new();
1517 pdf.extend_from_slice(b"%PDF-1.4\n");
1518 let obj1_offset = pdf.len();
1519 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1520 let obj2_offset = pdf.len();
1521 pdf.extend_from_slice(
1522 b"2 0 obj\n<< /Type /Pages /MediaBox [0 0 200 300] /Count 1 /Kids [3 0 R] >>\nendobj\n",
1523 );
1524 let obj3_offset = pdf.len();
1525 pdf.extend_from_slice(b"3 0 obj\n<< /Type /Page /Parent 2 0 R >>\nendobj\n");
1526 let obj4_offset = pdf.len();
1527 let content = b"q 0 0 0 rg 0 290 10 10 re B* Q";
1528 pdf.extend_from_slice(
1529 format!("4 0 obj\n<< /Length {} >>\nstream\n", content.len()).as_bytes(),
1530 );
1531 pdf.extend_from_slice(content);
1532 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1533 let xref_offset = pdf.len();
1534 pdf.extend_from_slice(b"xref\n0 5\n");
1535 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1536 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1537 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1538 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1539 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj4_offset).as_bytes());
1540 pdf.extend_from_slice(b"trailer\n<< /Root 1 0 R /Size 4 >>\n");
1542 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1543
1544 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1545 assert!(store.contains(ObjectId::new(1, 0)));
1547 assert!(store.contains(ObjectId::new(2, 0)));
1548 assert!(store.contains(ObjectId::new(3, 0)));
1549 assert!(store.contains(ObjectId::new(4, 0)));
1550 assert_eq!(store.trailer().root, ObjectId::new(1, 0));
1551 assert_eq!(store.trailer().size, 4);
1553 }
1554
1555 fn build_pdf_with_objstm() -> Vec<u8> {
1562 use flate2::Compression;
1563 use flate2::write::ZlibEncoder;
1564 use std::io::Write;
1565
1566 let mut pdf = Vec::new();
1567 pdf.extend_from_slice(b"%PDF-1.5\n");
1568
1569 let obj1_offset = pdf.len();
1571 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1572
1573 let obj2_offset = pdf.len();
1575 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1576
1577 let objstm_raw = b"10 0 11 3 42 true";
1581 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1582 encoder.write_all(objstm_raw).unwrap();
1583 let objstm_compressed = encoder.finish().unwrap();
1584
1585 let obj3_offset = pdf.len();
1586 let objstm_dict = format!(
1587 "3 0 obj\n<< /Type /ObjStm /N 2 /First 10 /Length {} /Filter /FlateDecode >>\nstream\n",
1588 objstm_compressed.len()
1589 );
1590 pdf.extend_from_slice(objstm_dict.as_bytes());
1591 pdf.extend_from_slice(&objstm_compressed);
1592 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1593
1594 let mut xref_data = Vec::new();
1600 xref_data.push(0);
1602 xref_data.extend_from_slice(&(0u16).to_be_bytes());
1603 xref_data.push(255);
1604 xref_data.push(1);
1606 xref_data.extend_from_slice(&(obj1_offset as u16).to_be_bytes());
1607 xref_data.push(0);
1608 xref_data.push(1);
1610 xref_data.extend_from_slice(&(obj2_offset as u16).to_be_bytes());
1611 xref_data.push(0);
1612 xref_data.push(1);
1614 xref_data.extend_from_slice(&(obj3_offset as u16).to_be_bytes());
1615 xref_data.push(0);
1616 xref_data.push(2);
1618 xref_data.extend_from_slice(&(3u16).to_be_bytes()); xref_data.push(0); xref_data.push(2);
1622 xref_data.extend_from_slice(&(3u16).to_be_bytes()); xref_data.push(1); let xref_offset = pdf.len();
1626 let xref_dict = format!(
1627 "4 0 obj\n<< /Type /XRef /Size 12 /W [1 2 1] /Index [0 4 10 2] /Root 1 0 R /Length {} >>\nstream\n",
1628 xref_data.len()
1629 );
1630 pdf.extend_from_slice(xref_dict.as_bytes());
1631 pdf.extend_from_slice(&xref_data);
1632 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1633
1634 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1635
1636 pdf
1637 }
1638
1639 #[test]
1641 fn test_resolve_object_from_objstm() {
1642 let pdf = build_pdf_with_objstm();
1643 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1644
1645 let obj10 = store.resolve(ObjectId::new(10, 0)).unwrap();
1647 assert_eq!(obj10.as_i64(), Some(42));
1648
1649 let obj11 = store.resolve(ObjectId::new(11, 0)).unwrap();
1651 assert_eq!(obj11.as_bool(), Some(true));
1652 }
1653
1654 #[test]
1656 fn test_objstm_same_object_twice() {
1657 let pdf = build_pdf_with_objstm();
1658 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1659
1660 let first = store.resolve(ObjectId::new(10, 0)).unwrap();
1661 let second = store.resolve(ObjectId::new(10, 0)).unwrap();
1662 assert_eq!(first.as_i64(), second.as_i64());
1663 assert_eq!(first.as_i64(), Some(42));
1664 }
1665
1666 #[test]
1668 fn test_objstm_cache_populated_after_first_access() {
1669 let pdf = build_pdf_with_objstm();
1670 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1671
1672 assert_eq!(store.objstm_cache_len(), 0);
1674
1675 let _ = store.resolve(ObjectId::new(10, 0)).unwrap();
1677
1678 assert_eq!(store.objstm_cache_len(), 1);
1680
1681 let obj11 = store.resolve(ObjectId::new(11, 0)).unwrap();
1683 assert_eq!(obj11.as_bool(), Some(true));
1684
1685 assert_eq!(store.objstm_cache_len(), 1);
1687 }
1688
1689 #[test]
1691 fn test_objstm_cache_size_limit() {
1692 assert_eq!(MAX_OBJSTM_CACHE_ENTRIES, 256);
1694 }
1695
1696 #[test]
1702 fn test_malformed_wrong_xref_offsets_lenient() {
1703 let mut pdf = Vec::new();
1704 pdf.extend_from_slice(b"%PDF-1.4\n");
1705 let _obj1_offset = pdf.len();
1707 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1708 let obj2_offset = pdf.len();
1709 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1710 let xref_offset = pdf.len();
1711 pdf.extend_from_slice(b"xref\n0 3\n");
1712 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1713 pdf.extend_from_slice(b"0000099999 00000 n \r\n");
1715 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1716 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1717 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1718 let _ = ObjectStore::open(pdf, ParsingMode::Lenient);
1720 }
1721
1722 #[test]
1724 fn test_malformed_wrong_xref_offsets_strict() {
1725 let mut pdf = Vec::new();
1726 pdf.extend_from_slice(b"%PDF-1.4\n");
1727 let _obj1_offset = pdf.len();
1729 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1730 let obj2_offset = pdf.len();
1731 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1732 let xref_offset = pdf.len();
1733 pdf.extend_from_slice(b"xref\n0 3\n");
1734 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1735 pdf.extend_from_slice(b"0000099999 00000 n \r\n");
1736 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1737 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1738 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1739 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
1740 let result = store.resolve(ObjectId::new(1, 0));
1742 assert!(result.is_err());
1743 }
1744
1745 #[test]
1747 fn test_malformed_truncated_pdf() {
1748 let pdf = b"%PDF-1.4\n1 0 obj\n<< /Type /Cat".to_vec();
1749 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1750 assert!(result.is_err());
1751 }
1752
1753 #[test]
1755 fn test_malformed_missing_eof_marker() {
1756 let mut pdf = Vec::new();
1757 pdf.extend_from_slice(b"%PDF-1.4\n");
1758 let obj1_offset = pdf.len();
1759 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1760 let obj2_offset = pdf.len();
1761 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1762 let xref_offset = pdf.len();
1763 pdf.extend_from_slice(b"xref\n0 3\n");
1764 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1765 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1766 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1767 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1768 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n").as_bytes());
1769 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1771 let _ = result;
1773 }
1774
1775 #[test]
1777 fn test_malformed_empty_pdf() {
1778 let result = ObjectStore::open(Vec::new(), ParsingMode::Lenient);
1779 assert!(result.is_err());
1780 }
1781
1782 #[test]
1784 fn test_malformed_header_only() {
1785 let result = ObjectStore::open(b"%PDF-1.4\n".to_vec(), ParsingMode::Lenient);
1786 assert!(result.is_err());
1787 }
1788
1789 #[test]
1791 fn test_malformed_invalid_startxref() {
1792 let mut pdf = Vec::new();
1793 pdf.extend_from_slice(b"%PDF-1.4\n");
1794 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1795 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1796 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1797 pdf.extend_from_slice(b"startxref\n99999999\n%%EOF");
1798 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1800 let _ = result;
1801 }
1802
1803 #[test]
1805 fn test_malformed_duplicate_dict_keys() {
1806 let mut pdf = Vec::new();
1807 pdf.extend_from_slice(b"%PDF-1.4\n");
1808 let obj1_offset = pdf.len();
1809 pdf.extend_from_slice(
1811 b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R /Type /Catalog >>\nendobj\n",
1812 );
1813 let obj2_offset = pdf.len();
1814 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1815 let xref_offset = pdf.len();
1816 pdf.extend_from_slice(b"xref\n0 3\n");
1817 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1818 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1819 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1820 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1821 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1822 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1823 assert!(result.is_ok());
1824 }
1825
1826 #[test]
1828 fn test_malformed_oversized_trailer_size() {
1829 let mut pdf = Vec::new();
1830 pdf.extend_from_slice(b"%PDF-1.4\n");
1831 let obj1_offset = pdf.len();
1832 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1833 let obj2_offset = pdf.len();
1834 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1835 let xref_offset = pdf.len();
1836 pdf.extend_from_slice(b"xref\n0 3\n");
1837 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1838 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1839 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1840 pdf.extend_from_slice(b"trailer\n<< /Size 999999999 /Root 1 0 R >>\n");
1842 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1843 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1844 let _ = result;
1846 }
1847
1848 #[test]
1850 fn test_malformed_resolve_nonexistent_object() {
1851 let pdf = build_minimal_pdf();
1852 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1853 let result = store.resolve(ObjectId::new(999, 0));
1854 assert!(result.is_err());
1855 }
1856
1857 #[test]
1859 fn test_malformed_wrong_generation() {
1860 let pdf = build_minimal_pdf();
1861 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1862 let result = store.resolve(ObjectId::new(1, 1));
1864 let _ = result;
1866 }
1867
1868 #[test]
1870 fn test_malformed_deeply_nested_arrays() {
1871 let mut pdf = Vec::new();
1872 pdf.extend_from_slice(b"%PDF-1.4\n");
1873 let obj1_offset = pdf.len();
1874 let mut nested = String::from("1 0 obj\n");
1875 for _ in 0..100 {
1876 nested.push('[');
1877 }
1878 nested.push_str("true");
1879 for _ in 0..100 {
1880 nested.push(']');
1881 }
1882 nested.push_str("\nendobj\n");
1883 pdf.extend_from_slice(nested.as_bytes());
1884 let obj2_offset = pdf.len();
1885 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Catalog /Pages 3 0 R >>\nendobj\n");
1886 let obj3_offset = pdf.len();
1887 pdf.extend_from_slice(b"3 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1888 let xref_offset = pdf.len();
1889 pdf.extend_from_slice(b"xref\n0 4\n");
1890 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1891 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1892 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1893 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1894 pdf.extend_from_slice(b"trailer\n<< /Size 4 /Root 2 0 R >>\n");
1895 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1896 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1897 let _ = result;
1899 }
1900
1901 #[test]
1903 fn test_malformed_resolve_object_zero() {
1904 let pdf = build_minimal_pdf();
1905 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
1906 let result = store.resolve(ObjectId::new(0, 0));
1908 let _ = result;
1909 }
1910
1911 #[test]
1913 fn test_malformed_stream_wrong_length() {
1914 let mut pdf = Vec::new();
1915 pdf.extend_from_slice(b"%PDF-1.4\n");
1916 let obj1_offset = pdf.len();
1917 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1918 let obj2_offset = pdf.len();
1919 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1920 let obj3_offset = pdf.len();
1921 let content = b"BT /F1 12 Tf 10 10 Td (Hello) Tj ET";
1922 pdf.extend_from_slice(b"3 0 obj\n<< /Length 99999 >>\nstream\n");
1924 pdf.extend_from_slice(content);
1925 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1926 let xref_offset = pdf.len();
1927 pdf.extend_from_slice(b"xref\n0 4\n");
1928 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1929 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1930 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1931 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
1932 pdf.extend_from_slice(b"trailer\n<< /Size 4 /Root 1 0 R >>\n");
1933 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1934 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1935 let _ = result;
1937 }
1938
1939 #[test]
1941 fn test_malformed_missing_root_with_catalog() {
1942 let mut pdf = Vec::new();
1943 pdf.extend_from_slice(b"%PDF-1.4\n");
1944 let obj1_offset = pdf.len();
1945 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1946 let obj2_offset = pdf.len();
1947 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1948 let xref_offset = pdf.len();
1949 pdf.extend_from_slice(b"xref\n0 3\n");
1950 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1951 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1952 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1953 pdf.extend_from_slice(b"trailer\n<< /Size 3 >>\n");
1955 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1956 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1957 assert!(result.is_ok());
1959 }
1960
1961 #[test]
1963 fn test_malformed_missing_root_no_catalog() {
1964 let mut pdf = Vec::new();
1965 pdf.extend_from_slice(b"%PDF-1.4\n");
1966 let obj1_offset = pdf.len();
1967 pdf.extend_from_slice(b"1 0 obj\n<< /Foo /Bar >>\nendobj\n");
1969 let obj2_offset = pdf.len();
1970 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1971 let xref_offset = pdf.len();
1972 pdf.extend_from_slice(b"xref\n0 3\n");
1973 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1974 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1975 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1976 pdf.extend_from_slice(b"trailer\n<< /Size 3 >>\n");
1978 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
1979 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
1980 assert!(result.is_err());
1981 }
1982
1983 #[test]
1985 fn test_malformed_circular_reference() {
1986 let mut pdf = Vec::new();
1987 pdf.extend_from_slice(b"%PDF-1.4\n");
1988 let obj1_offset = pdf.len();
1989 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R /Extra 2 0 R >>\nendobj\n");
1990 let obj2_offset = pdf.len();
1991 pdf.extend_from_slice(
1992 b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 /Parent 1 0 R >>\nendobj\n",
1993 );
1994 let xref_offset = pdf.len();
1995 pdf.extend_from_slice(b"xref\n0 3\n");
1996 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1997 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1998 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1999 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
2000 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
2001 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
2002 let obj1 = store.resolve(ObjectId::new(1, 0)).unwrap();
2004 assert!(obj1.as_dict().is_some());
2005 }
2006
2007 #[test]
2009 fn test_malformed_whitespace_only_after_header() {
2010 let pdf = b"%PDF-1.4\n \n \n".to_vec();
2011 let result = ObjectStore::open(pdf, ParsingMode::Lenient);
2012 assert!(result.is_err());
2013 }
2014}