1use crate::crypto::{DecryptionError, DecryptionTarget, Decryptor, get};
4use crate::data::Data;
5use crate::metadata::Metadata;
6use crate::object::Name;
7use crate::object::ObjectIdentifier;
8use crate::object::Stream;
9use crate::object::dict::keys::{
10 AUTHOR, CREATION_DATE, CREATOR, ENCRYPT, FIRST, ID, INDEX, INFO, KEYWORDS, MOD_DATE, N,
11 OCPROPERTIES, PAGES, PREV, PRODUCER, ROOT, SIZE, SUBJECT, TITLE, TYPE, VERSION, W, XREF_STM,
12};
13use crate::object::indirect::IndirectObject;
14use crate::object::{Array, MaybeRef};
15use crate::object::{DateTime, Dict};
16use crate::object::{Object, ObjectLike};
17use crate::pdf::{PdfLoadLimits, PdfVersion};
18use crate::reader::Reader;
19use crate::reader::{Readable, ReaderContext, ReaderExt};
20use crate::sync::{Arc, FxHashMap, RwLock, RwLockExt};
21use crate::{PdfData, object};
22use alloc::vec;
23use alloc::vec::Vec;
24use core::cmp::max;
25use core::iter;
26use core::ops::Deref;
27use log::{error, warn};
28
29pub(crate) const XREF_ENTRY_LEN: usize = 20;
30
31#[derive(Debug, Copy, Clone)]
32pub(crate) enum XRefError {
33 Unknown,
34 Encryption(DecryptionError),
35}
36
37pub(crate) fn root_xref(
39 data: PdfData,
40 password: &[u8],
41 limits: PdfLoadLimits,
42) -> Result<XRef, XRefError> {
43 let mut xref_map = FxHashMap::default();
44 let xref_pos = find_last_xref_pos(data.as_ref()).ok_or(XRefError::Unknown)?;
45 let trailer =
46 populate_xref_impl(data.as_ref(), xref_pos, &mut xref_map).ok_or(XRefError::Unknown)?;
47
48 XRef::new(
49 data.clone(),
50 xref_map,
51 XRefInput::TrailerDictData(trailer),
52 false,
53 password,
54 limits,
55 )
56}
57
58pub(crate) fn fallback(data: PdfData, password: &[u8], limits: PdfLoadLimits) -> Option<XRef> {
60 warn!("xref table was invalid, trying to manually build xref table");
61 let (xref_map, xref_input) = fallback_xref_map(&data, password);
62
63 if let Some(xref_input) = xref_input {
64 warn!("rebuild xref table with {} entries", xref_map.len());
65
66 XRef::new(data.clone(), xref_map, xref_input, true, password, limits).ok()
67 } else {
68 warn!("couldn't find trailer dictionary, failed to rebuild xref table");
69
70 None
71 }
72}
73
74fn fallback_xref_map<'a>(data: &'a PdfData, password: &[u8]) -> (XrefMap, Option<XRefInput<'a>>) {
75 fallback_xref_map_inner(data, ReaderContext::dummy(), true, password)
76}
77
78fn fallback_xref_map_inner<'a>(
79 data: &'a PdfData,
80 mut dummy_ctx: ReaderContext<'a>,
81 recurse: bool,
82 password: &[u8],
83) -> (XrefMap, Option<XRefInput<'a>>) {
84 let mut xref_map = FxHashMap::default();
85 let mut trailer_dicts = vec![];
86 let mut root_ref = None;
87
88 let mut r = Reader::new(data.as_ref());
89
90 let mut last_obj_num = None;
91
92 loop {
93 let cur_pos = r.offset();
94
95 let mut old_r = r.clone();
96
97 if let Some(obj_id) = r.read::<ObjectIdentifier>(&dummy_ctx) {
98 let mut cloned = r.clone();
99 cloned.skip_white_spaces_and_comments();
101 if cloned.skip::<Object<'_>>(false).is_some() {
102 xref_map.insert(obj_id, EntryType::Normal(cur_pos));
103 last_obj_num = Some(obj_id);
104 dummy_ctx.set_obj_number(obj_id);
105 }
106 } else if let Some(dict) = r.read::<Dict<'_>>(&dummy_ctx) {
107 if dict.contains_key(ROOT) {
108 trailer_dicts.push(dict.clone());
109 }
110
111 if dict
112 .get::<Name>(TYPE)
113 .is_some_and(|n| n.as_str() == "Catalog")
114 {
115 root_ref = last_obj_num;
116 }
117
118 if let Some(stream) = old_r.read::<Stream<'_>>(&dummy_ctx)
119 && stream.dict().get::<Name>(TYPE).as_deref() == Some(b"ObjStm")
120 && let Some(data) = stream.decoded().ok()
121 && let Some(last_obj_num) = last_obj_num
122 && let Some(obj_stream) = ObjectStream::new(stream, &data, &dummy_ctx)
123 {
124 for (idx, (obj_num, _)) in obj_stream.offsets.iter().enumerate() {
125 let id = ObjectIdentifier::new(*obj_num as i32, 0);
126 if xref_map
131 .get(&id)
132 .is_none_or(|e| !matches!(e, &EntryType::Normal(_)))
133 {
134 xref_map.insert(
135 id,
136 EntryType::ObjStream(last_obj_num.obj_number as u32, idx as u32),
137 );
138 }
139 }
140 }
141 } else {
142 r.read_byte();
143 }
144
145 if r.at_end() {
146 break;
147 }
148 }
149
150 let mut trailer_dict = None;
152
153 for dict in trailer_dicts {
154 if let Some(root_id) = dict.get_raw::<Dict<'_>>(ROOT) {
155 let check = |dict: &Dict<'_>| -> bool { dict.contains_key(PAGES) };
156
157 match root_id {
158 MaybeRef::Ref(r) => match xref_map.get(&r.into()) {
159 Some(EntryType::Normal(offset)) => {
160 let mut reader = Reader::new(&data.as_ref()[*offset..]);
161
162 if let Some(obj) =
163 reader.read_with_context::<IndirectObject<Dict<'_>>>(&dummy_ctx)
164 && check(&obj.clone().get())
165 {
166 trailer_dict = Some(dict);
167 }
168 }
169 Some(EntryType::ObjStream(obj_num, idx)) => {
170 if let Some(EntryType::Normal(offset)) =
171 xref_map.get(&ObjectIdentifier::new(*obj_num as i32, 0))
172 {
173 let mut reader = Reader::new(&data.as_ref()[*offset..]);
174
175 if let Some(stream) =
176 reader.read_with_context::<IndirectObject<Stream<'_>>>(&dummy_ctx)
177 && let Some(data) = stream.clone().get().decoded().ok()
178 && let Some(object_stream) =
179 ObjectStream::new(stream.get(), &data, &dummy_ctx)
180 && let Some(obj) = object_stream.get::<Dict<'_>>(*idx)
181 && check(&obj)
182 {
183 trailer_dict = Some(dict);
184 }
185 }
186 }
187 _ => {}
188 },
189 MaybeRef::NotRef(d) => {
190 if check(&d) {
191 trailer_dict = Some(dict);
192 }
193 }
194 }
195 }
196 }
197
198 let has_encryption = trailer_dict
199 .as_ref()
200 .is_some_and(|t| t.contains_key(ENCRYPT));
201
202 if has_encryption && recurse {
203 if let Some(Ok(xref)) = trailer_dict.as_ref().map(|d| {
208 XRef::new(
209 data.clone(),
210 xref_map.clone(),
211 XRefInput::TrailerDictData(d.data()),
212 true,
213 password,
214 PdfLoadLimits::default(),
215 )
216 }) {
217 let ctx = ReaderContext::new(&xref, false);
218 let (patched_map, _) = fallback_xref_map_inner(data, ctx, false, password);
219 xref_map = patched_map;
220 }
221 }
222
223 if let Some(trailer_dict_data) = trailer_dict.map(|d| d.data()) {
224 (
225 xref_map,
226 Some(XRefInput::TrailerDictData(trailer_dict_data)),
227 )
228 } else if let Some(root_ref) = root_ref {
229 (xref_map, Some(XRefInput::RootRef(root_ref)))
230 } else {
231 (xref_map, None)
232 }
233}
234
235const DUMMY_XREF: XRef = XRef(Inner::Dummy);
236
237#[derive(Debug, Clone)]
239pub struct XRef(Inner);
240
241impl XRef {
242 fn new(
243 data: PdfData,
244 xref_map: XrefMap,
245 input: XRefInput<'_>,
246 repaired: bool,
247 password: &[u8],
248 load_limits: PdfLoadLimits,
249 ) -> Result<Self, XRefError> {
250 let trailer_data = TrailerData::dummy();
254
255 let mut xref = Self(Inner::Some(Arc::new(SomeRepr {
256 data: Arc::new(Data::new(data)),
257 map: Arc::new(RwLock::new(MapRepr { xref_map, repaired })),
258 decryptor: Arc::new(Decryptor::None),
259 has_ocgs: false,
260 metadata: Arc::new(Metadata::default()),
261 trailer_data,
262 password: password.to_vec(),
263 load_limits,
264 })));
265
266 let decryptor = {
271 match input {
272 XRefInput::TrailerDictData(trailer_dict_data) => {
273 let mut r = Reader::new(trailer_dict_data);
274
275 let trailer_dict = r
276 .read_with_context::<Dict<'_>>(&ReaderContext::new(&xref, false))
277 .ok_or(XRefError::Unknown)?;
278
279 get_decryptor(&trailer_dict, password)?
280 }
281 XRefInput::RootRef(_) => Decryptor::None,
282 }
283 };
284
285 match &mut xref.0 {
286 Inner::Dummy => unreachable!(),
287 Inner::Some(r) => {
288 let mutable = Arc::make_mut(r);
289 mutable.decryptor = Arc::new(decryptor.clone());
290 }
291 }
292
293 let (trailer_data, has_ocgs, metadata) = match input {
294 XRefInput::TrailerDictData(trailer_dict_data) => {
295 let mut r = Reader::new(trailer_dict_data);
296
297 let trailer_dict = r
298 .read_with_context::<Dict<'_>>(&ReaderContext::new(&xref, false))
299 .ok_or(XRefError::Unknown)?;
300
301 let root_ref = trailer_dict.get_ref(ROOT).ok_or(XRefError::Unknown)?;
302 let root = trailer_dict
303 .get::<Dict<'_>>(ROOT)
304 .ok_or(XRefError::Unknown)?;
305 let metadata = trailer_dict
306 .get::<Dict<'_>>(INFO)
307 .map(|d| parse_metadata(&d))
308 .unwrap_or_default();
309 let pages_ref = root.get_ref(PAGES).ok_or(XRefError::Unknown)?;
310 let has_ocgs = root.get::<Dict<'_>>(OCPROPERTIES).is_some();
311 let version = root
312 .get::<Name>(VERSION)
313 .and_then(|v| PdfVersion::from_bytes(v.deref()));
314
315 let td = TrailerData {
316 pages_ref: pages_ref.into(),
317 root_ref: root_ref.into(),
318 version,
319 };
320
321 (td, has_ocgs, metadata)
322 }
323 XRefInput::RootRef(root_ref) => {
324 let root = xref.get::<Dict<'_>>(root_ref).ok_or(XRefError::Unknown)?;
325 let pages_ref = root.get_ref(PAGES).ok_or(XRefError::Unknown)?;
326
327 let td = TrailerData {
328 pages_ref: pages_ref.into(),
329 root_ref,
330 version: None,
331 };
332
333 (td, false, Metadata::default())
334 }
335 };
336
337 match &mut xref.0 {
338 Inner::Dummy => unreachable!(),
339 Inner::Some(r) => {
340 let mutable = Arc::make_mut(r);
341 mutable.trailer_data = trailer_data;
342 mutable.decryptor = Arc::new(decryptor);
343 mutable.has_ocgs = has_ocgs;
344 mutable.metadata = Arc::new(metadata);
345 }
346 }
347
348 Ok(xref)
349 }
350
351 fn is_repaired(&self) -> bool {
352 match &self.0 {
353 Inner::Dummy => false,
354 Inner::Some(r) => {
355 let locked = r.map.get();
356 locked.repaired
357 }
358 }
359 }
360
361 pub(crate) fn dummy() -> &'static Self {
362 &DUMMY_XREF
363 }
364
365 pub(crate) fn load_limits(&self) -> PdfLoadLimits {
366 match &self.0 {
367 Inner::Dummy => PdfLoadLimits::default(),
368 Inner::Some(r) => r.load_limits,
369 }
370 }
371
372 pub(crate) fn len(&self) -> usize {
373 match &self.0 {
374 Inner::Dummy => 0,
375 Inner::Some(r) => r.map.get().xref_map.len(),
376 }
377 }
378
379 pub(crate) fn trailer_data(&self) -> &TrailerData {
380 match &self.0 {
381 Inner::Dummy => unreachable!(),
382 Inner::Some(r) => &r.trailer_data,
383 }
384 }
385
386 pub(crate) fn metadata(&self) -> &Metadata {
387 match &self.0 {
388 Inner::Dummy => unreachable!(),
389 Inner::Some(r) => &r.metadata,
390 }
391 }
392
393 pub fn root_id(&self) -> ObjectIdentifier {
395 self.trailer_data().root_ref
396 }
397
398 pub fn has_optional_content_groups(&self) -> bool {
400 match &self.0 {
401 Inner::Dummy => false,
402 Inner::Some(r) => r.has_ocgs,
403 }
404 }
405
406 pub(crate) fn objects(&self) -> impl IntoIterator<Item = Object<'_>> + '_ {
407 match &self.0 {
408 Inner::Dummy => unimplemented!(),
409 Inner::Some(r) => {
410 let locked = r.map.get();
411 let mut elements = locked
412 .xref_map
413 .iter()
414 .map(|(id, e)| {
415 let offset = match e {
416 EntryType::Normal(o) => (*o, 0),
417 EntryType::ObjStream(id, index) => {
418 if let Some(EntryType::Normal(offset)) =
419 locked.xref_map.get(&ObjectIdentifier::new(*id as i32, 0))
420 {
421 (*offset, *index)
422 } else {
423 (usize::MAX, 0)
424 }
425 }
426 };
427
428 (*id, offset)
429 })
430 .collect::<Vec<_>>();
431
432 elements.sort_by_key(|e1| e1.1);
435
436 let mut iter = elements.into_iter();
437
438 iter::from_fn(move || {
439 for next in iter.by_ref() {
440 if let Some(obj) = self.get_with(next.0, &ReaderContext::new(self, false)) {
441 return Some(obj);
442 } else {
443 continue;
445 }
446 }
447
448 None
449 })
450 }
451 }
452 }
453
454 pub(crate) fn repair(&self) {
455 let Inner::Some(r) = &self.0 else {
456 unreachable!();
457 };
458
459 let mut locked = r
460 .map
461 .try_put()
462 .expect("xref repair: map lock not contended");
463 assert!(!locked.repaired);
464
465 let (xref_map, _) = fallback_xref_map(r.data.get(), &r.password);
466 locked.xref_map = xref_map;
467 locked.repaired = true;
468 }
469
470 #[inline]
471 pub(crate) fn needs_decryption(&self, ctx: &ReaderContext<'_>) -> bool {
472 match &self.0 {
473 Inner::Dummy => false,
474 Inner::Some(r) => {
475 if matches!(r.decryptor.as_ref(), Decryptor::None) {
476 false
477 } else {
478 !ctx.in_content_stream() && !ctx.in_object_stream()
479 }
480 }
481 }
482 }
483
484 #[inline]
485 pub(crate) fn decrypt(
486 &self,
487 id: ObjectIdentifier,
488 data: &[u8],
489 target: DecryptionTarget,
490 ) -> Option<Vec<u8>> {
491 match &self.0 {
492 Inner::Dummy => Some(data.to_vec()),
493 Inner::Some(r) => r.decryptor.decrypt(id, data, target),
494 }
495 }
496
497 #[allow(private_bounds)]
499 pub fn get<'a, T>(&'a self, id: ObjectIdentifier) -> Option<T>
500 where
501 T: ObjectLike<'a>,
502 {
503 let ctx = ReaderContext::new(self, false);
504 self.get_with(id, &ctx)
505 }
506
507 #[allow(private_bounds)]
509 pub(crate) fn get_with<'a, T>(
510 &'a self,
511 id: ObjectIdentifier,
512 ctx: &ReaderContext<'a>,
513 ) -> Option<T>
514 where
515 T: ObjectLike<'a>,
516 {
517 let Inner::Some(repr) = &self.0 else {
518 return None;
519 };
520
521 let locked = repr.map.try_get()?;
522
523 let mut r = Reader::new(repr.data.get().as_ref());
524
525 let entry = *locked.xref_map.get(&id).or({
526 None
529 })?;
530 drop(locked);
531
532 let mut ctx = ctx.clone();
533 ctx.set_obj_number(id);
534 ctx.set_in_content_stream(false);
535
536 match entry {
537 EntryType::Normal(offset) => {
538 ctx.set_in_object_stream(false);
539 r.jump(offset);
540
541 if let Some(object) = r.read_with_context::<IndirectObject<T>>(&ctx) {
542 if object.id() == &id {
543 return Some(object.get());
544 }
545 } else {
546 if r.skip_not_in_content_stream::<IndirectObject<Object<'_>>>()
549 .is_some()
550 {
551 return None;
552 }
553 };
554
555 if self.is_repaired() {
557 error!(
558 "attempt was made at repairing xref, but object {id:?} still couldn't be read"
559 );
560
561 None
562 } else {
563 warn!("broken xref, attempting to repair");
564
565 self.repair();
566
567 self.get_with::<T>(id, &ctx)
569 }
570 }
571 EntryType::ObjStream(obj_stram_gen_num, index) => {
572 let obj_stream_id = ObjectIdentifier::new(obj_stram_gen_num as i32, 0);
574
575 if obj_stream_id == id {
576 warn!("cycle detected in object stream");
577
578 return None;
579 }
580
581 let stream = self.get_with::<Stream<'_>>(obj_stream_id, &ctx)?;
582 let data = repr.data.get_with(obj_stream_id, &ctx)?;
583 let object_stream = ObjectStream::new(stream, data, &ctx)?;
584 object_stream.get(index)
585 }
586 }
587 }
588}
589
590#[derive(Debug, Copy, Clone)]
593pub(crate) enum XRefInput<'a> {
594 TrailerDictData(&'a [u8]),
597 RootRef(ObjectIdentifier),
605}
606
607pub(crate) fn find_last_xref_pos(data: &[u8]) -> Option<usize> {
608 let mut finder = Reader::new(data);
609 let mut pos = finder.len().checked_sub(1)?;
610 finder.jump(pos);
611
612 let needle = b"startxref";
613
614 loop {
615 if finder.forward_tag(needle).is_some() {
616 finder.skip_white_spaces_and_comments();
617
618 let offset = finder.read_without_context::<i32>()?.try_into().ok()?;
619
620 return Some(offset);
621 }
622
623 pos = pos.checked_sub(1)?;
624 finder.jump(pos);
625 }
626}
627
628#[derive(Debug, PartialEq, Eq, Clone, Copy)]
630enum EntryType {
631 Normal(usize),
633 ObjStream(u32, u32),
637}
638
639type XrefMap = FxHashMap<ObjectIdentifier, EntryType>;
640
641#[derive(Debug)]
643struct MapRepr {
644 xref_map: XrefMap,
645 repaired: bool,
646}
647
648#[derive(Debug, Copy, Clone)]
649pub(crate) struct TrailerData {
650 pub(crate) pages_ref: ObjectIdentifier,
651 pub(crate) root_ref: ObjectIdentifier,
652 pub(crate) version: Option<PdfVersion>,
653}
654
655impl TrailerData {
656 pub(crate) fn dummy() -> Self {
657 Self {
658 pages_ref: ObjectIdentifier::new(0, 0),
659 root_ref: ObjectIdentifier::new(0, 0),
660 version: None,
661 }
662 }
663}
664
665#[derive(Debug, Clone)]
666struct SomeRepr {
667 data: Arc<Data>,
668 map: Arc<RwLock<MapRepr>>,
669 metadata: Arc<Metadata>,
670 decryptor: Arc<Decryptor>,
671 has_ocgs: bool,
672 password: Vec<u8>,
673 trailer_data: TrailerData,
674 load_limits: PdfLoadLimits,
675}
676
677#[derive(Debug, Clone)]
678enum Inner {
679 Dummy,
681 Some(Arc<SomeRepr>),
683}
684
685#[derive(Debug)]
686struct XRefEntry {
687 offset: usize,
688 gen_number: i32,
689 used: bool,
690}
691
692impl XRefEntry {
693 pub(crate) fn read(data: &[u8]) -> Option<Self> {
694 #[inline(always)]
695 fn parse_u32(data: &[u8]) -> Option<u32> {
696 let mut accum = 0_u32;
697
698 for byte in data {
699 accum = accum.checked_mul(10)?;
700
701 match *byte {
702 b'0'..=b'9' => accum = accum.checked_add((*byte - b'0') as u32)?,
703 _ => return None,
704 }
705 }
706
707 Some(accum)
708 }
709
710 let offset = parse_u32(&data[0..10])? as usize;
711 let gen_number = i32::try_from(parse_u32(&data[11..16])?).ok()?;
712
713 let used = data[17] == b'n';
714
715 Some(Self {
716 offset,
717 gen_number,
718 used,
719 })
720 }
721}
722
723const MAX_XREF_CHAIN_DEPTH: usize = 64;
726
727fn populate_xref_impl<'a>(data: &'a [u8], pos: usize, xref_map: &mut XrefMap) -> Option<&'a [u8]> {
728 populate_xref_depth(data, pos, xref_map, 0)
729}
730
731fn populate_xref_depth<'a>(
732 data: &'a [u8],
733 pos: usize,
734 xref_map: &mut XrefMap,
735 depth: usize,
736) -> Option<&'a [u8]> {
737 if depth > MAX_XREF_CHAIN_DEPTH {
738 log::warn!("Xref chain depth exceeds {MAX_XREF_CHAIN_DEPTH}, stopping traversal");
739 return None;
740 }
741 let mut reader = Reader::new(data);
742 reader.jump(pos);
743 reader.skip_white_spaces_and_comments();
745
746 let mut r2 = reader.clone();
747 if reader
748 .clone()
749 .read_without_context::<ObjectIdentifier>()
750 .is_some()
751 {
752 populate_from_xref_stream(data, &mut r2, xref_map, depth)
753 } else {
754 populate_from_xref_table(data, &mut r2, xref_map, depth)
755 }
756}
757
758pub(super) struct SubsectionHeader {
759 pub(super) start: u32,
760 pub(super) num_entries: u32,
761}
762
763impl Readable<'_> for SubsectionHeader {
764 fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
765 r.skip_white_spaces();
766 let start = r.read_without_context::<u32>()?;
767 r.skip_white_spaces();
768 let num_entries = r.read_without_context::<u32>()?;
769 r.skip_white_spaces();
770
771 Some(Self { start, num_entries })
772 }
773}
774
775fn populate_from_xref_table<'a>(
777 data: &'a [u8],
778 reader: &mut Reader<'a>,
779 insert_map: &mut XrefMap,
780 depth: usize,
781) -> Option<&'a [u8]> {
782 let trailer = {
783 let mut reader = reader.clone();
784 read_xref_table_trailer(&mut reader, &ReaderContext::dummy())?
785 };
786
787 reader.skip_white_spaces();
788 reader.forward_tag(b"xref")?;
789 reader.skip_white_spaces();
790
791 let mut max_obj = 0;
792
793 if let Some(prev) = trailer.get::<i32>(PREV) {
794 populate_xref_depth(data, prev as usize, insert_map, depth + 1)?;
796 }
797
798 if let Some(xref_stm) = trailer.get::<i32>(XREF_STM) {
801 populate_xref_depth(data, xref_stm as usize, insert_map, depth + 1)?;
802 }
803
804 while let Some(header) = reader.read_without_context::<SubsectionHeader>() {
805 reader.skip_white_spaces();
806
807 let start = header.start;
808 let end = start + header.num_entries;
809
810 for obj_number in start..end {
811 max_obj = max(max_obj, obj_number);
812 let bytes = reader.read_bytes(XREF_ENTRY_LEN)?;
813 let entry = XRefEntry::read(bytes)?;
814
815 if entry.used {
818 insert_map.insert(
819 ObjectIdentifier::new(obj_number as i32, entry.gen_number),
820 EntryType::Normal(entry.offset),
821 );
822 }
823 }
824 }
825
826 Some(trailer.data())
827}
828
829fn populate_from_xref_stream<'a>(
830 data: &'a [u8],
831 reader: &mut Reader<'a>,
832 insert_map: &mut XrefMap,
833 depth: usize,
834) -> Option<&'a [u8]> {
835 let stream = reader
836 .read_with_context::<IndirectObject<Stream<'_>>>(&ReaderContext::dummy())?
837 .get();
838
839 if let Some(prev) = stream.dict().get::<i32>(PREV) {
840 let _ = populate_xref_depth(data, prev as usize, insert_map, depth + 1)?;
842 }
843
844 let size = stream.dict().get::<u32>(SIZE)?;
845
846 let [f1_len, f2_len, f3_len] = stream.dict().get::<[u8; 3]>(W)?;
847
848 if f2_len > size_of::<u64>() as u8 {
849 error!("xref offset length is larger than the allowed limit");
850
851 return None;
852 }
853
854 if f1_len != 1 {
856 warn!("first field in xref stream was longer than 1");
857 }
858
859 let xref_data = stream.decoded().ok()?;
860 let mut xref_reader = Reader::new(xref_data.as_ref());
861
862 if let Some(arr) = stream.dict().get::<Array<'_>>(INDEX) {
863 let iter = arr.iter::<(u32, u32)>();
864
865 for (start, num_elements) in iter {
866 xref_stream_subsection(
867 &mut xref_reader,
868 start,
869 num_elements,
870 f1_len,
871 f2_len,
872 f3_len,
873 insert_map,
874 )?;
875 }
876 } else {
877 xref_stream_subsection(
878 &mut xref_reader,
879 0,
880 size,
881 f1_len,
882 f2_len,
883 f3_len,
884 insert_map,
885 )?;
886 }
887
888 Some(stream.dict().data())
889}
890
891fn xref_stream_num(data: &[u8]) -> Option<u32> {
892 Some(match data.len() {
893 0 => return None,
894 1 => u8::from_be(data[0]) as u32,
895 2 => u16::from_be_bytes(data[0..2].try_into().ok()?) as u32,
896 3 => u32::from_be_bytes([0, data[0], data[1], data[2]]),
897 4 => u32::from_be_bytes(data[0..4].try_into().ok()?),
898 8 => {
899 if let Ok(num) = u32::try_from(u64::from_be_bytes(data[0..8].try_into().ok()?)) {
900 return Some(num);
901 } else {
902 warn!("xref stream number is too large");
903
904 return None;
905 }
906 }
907 n => {
908 warn!("invalid xref stream number {n}");
909
910 return None;
911 }
912 })
913}
914
915fn xref_stream_subsection<'a>(
916 xref_reader: &mut Reader<'a>,
917 start: u32,
918 num_elements: u32,
919 f1_len: u8,
920 f2_len: u8,
921 f3_len: u8,
922 insert_map: &mut XrefMap,
923) -> Option<()> {
924 for i in 0..num_elements {
925 let f_type = if f1_len == 0 {
926 1
927 } else {
928 xref_reader.read_bytes(1)?[0]
930 };
931
932 let obj_number = start + i;
933
934 match f_type {
935 0 => {
937 xref_reader.skip_bytes(f2_len as usize + f3_len as usize)?;
938 }
939 1 => {
940 let offset = if f2_len > 0 {
941 let data = xref_reader.read_bytes(f2_len as usize)?;
942 xref_stream_num(data)?
943 } else {
944 0
945 };
946
947 let gen_number = if f3_len > 0 {
948 let data = xref_reader.read_bytes(f3_len as usize)?;
949 xref_stream_num(data)?
950 } else {
951 0
952 };
953
954 insert_map.insert(
955 ObjectIdentifier::new(obj_number as i32, gen_number as i32),
956 EntryType::Normal(offset as usize),
957 );
958 }
959 2 => {
960 let obj_stream_number = {
961 let data = xref_reader.read_bytes(f2_len as usize)?;
962 xref_stream_num(data)?
963 };
964 let gen_number = 0;
965 let index = if f3_len > 0 {
966 let data = xref_reader.read_bytes(f3_len as usize)?;
967 xref_stream_num(data)?
968 } else {
969 0
970 };
971
972 insert_map.insert(
973 ObjectIdentifier::new(obj_number as i32, gen_number),
974 EntryType::ObjStream(obj_stream_number, index),
975 );
976 }
977 _ => {
978 warn!("xref has unknown field type {f_type}");
979
980 return None;
981 }
982 }
983 }
984
985 Some(())
986}
987
988fn read_xref_table_trailer<'a>(
989 reader: &mut Reader<'a>,
990 ctx: &ReaderContext<'a>,
991) -> Option<Dict<'a>> {
992 reader.skip_white_spaces();
993 reader.forward_tag(b"xref")?;
994 reader.skip_white_spaces();
995
996 while let Some(header) = reader.read_without_context::<SubsectionHeader>() {
997 reader.jump(reader.offset() + XREF_ENTRY_LEN * header.num_entries as usize);
998 }
999
1000 reader.skip_white_spaces();
1001 reader.forward_tag(b"trailer")?;
1002 reader.skip_white_spaces();
1003
1004 reader.read_with_context::<Dict<'_>>(ctx)
1005}
1006
1007fn get_decryptor(trailer_dict: &Dict<'_>, password: &[u8]) -> Result<Decryptor, XRefError> {
1008 if let Some(encryption_dict) = trailer_dict.get::<Dict<'_>>(ENCRYPT) {
1009 let id = if let Some(id) = trailer_dict
1010 .get::<Array<'_>>(ID)
1011 .and_then(|a| a.flex_iter().next::<object::String>())
1012 {
1013 id.to_vec()
1014 } else {
1015 vec![]
1017 };
1018
1019 get(&encryption_dict, &id, password).map_err(XRefError::Encryption)
1020 } else {
1021 Ok(Decryptor::None)
1022 }
1023}
1024
1025struct ObjectStream<'a> {
1026 data: &'a [u8],
1027 ctx: ReaderContext<'a>,
1028 offsets: Vec<(u32, usize)>,
1029}
1030
1031impl<'a> ObjectStream<'a> {
1032 fn new(inner: Stream<'_>, data: &'a [u8], ctx: &ReaderContext<'a>) -> Option<Self> {
1033 let num_objects = inner.dict().get::<usize>(N)?;
1034 let first_offset = inner.dict().get::<usize>(FIRST)?;
1035
1036 let mut r = Reader::new(data);
1037
1038 let mut offsets = vec![];
1039
1040 for _ in 0..num_objects {
1041 r.skip_white_spaces_and_comments();
1042 let obj_num = r.read_without_context::<u32>()?;
1044 r.skip_white_spaces_and_comments();
1045 let relative_offset = r.read_without_context::<usize>()?;
1046 offsets.push((obj_num, first_offset + relative_offset));
1047 }
1048
1049 let mut ctx = ctx.clone();
1050 ctx.set_in_object_stream(true);
1051
1052 Some(Self { data, ctx, offsets })
1053 }
1054
1055 fn get<T>(&self, index: u32) -> Option<T>
1056 where
1057 T: ObjectLike<'a>,
1058 {
1059 let offset = self.offsets.get(index as usize)?.1;
1060 let mut r = Reader::new(self.data);
1061 r.jump(offset);
1062 r.skip_white_spaces_and_comments();
1063
1064 r.read_with_context::<T>(&self.ctx)
1065 }
1066}
1067
1068fn parse_metadata(info_dict: &Dict<'_>) -> Metadata {
1069 Metadata {
1070 creation_date: info_dict
1071 .get::<object::String>(CREATION_DATE)
1072 .and_then(|c| DateTime::from_bytes(&c)),
1073 modification_date: info_dict
1074 .get::<object::String>(MOD_DATE)
1075 .and_then(|c| DateTime::from_bytes(&c)),
1076 title: info_dict.get::<object::String>(TITLE).map(|t| t.to_vec()),
1077 author: info_dict.get::<object::String>(AUTHOR).map(|t| t.to_vec()),
1078 subject: info_dict.get::<object::String>(SUBJECT).map(|t| t.to_vec()),
1079 keywords: info_dict
1080 .get::<object::String>(KEYWORDS)
1081 .map(|t| t.to_vec()),
1082 creator: info_dict.get::<object::String>(CREATOR).map(|t| t.to_vec()),
1083 producer: info_dict
1084 .get::<object::String>(PRODUCER)
1085 .map(|t| t.to_vec()),
1086 }
1087}