1use crate::crypto::{DecryptionError, DecryptionTarget, Decryptor, get};
4use crate::data::Data;
5use crate::metadata::Metadata;
6use crate::object::Name;
7use crate::object::ObjectIdentifier;
8use crate::object::Stream;
9use crate::object::dict::keys::{
10 AUTHOR, CREATION_DATE, CREATOR, ENCRYPT, FIRST, ID, INDEX, INFO, KEYWORDS, MOD_DATE, N,
11 OCPROPERTIES, PAGES, PREV, PRODUCER, ROOT, SIZE, SUBJECT, TITLE, TYPE, VERSION, W, XREF_STM,
12};
13use crate::object::indirect::IndirectObject;
14use crate::object::{Array, MaybeRef};
15use crate::object::{DateTime, Dict};
16use crate::object::{Object, ObjectLike};
17use crate::pdf::PdfVersion;
18use crate::reader::Reader;
19use crate::reader::{Readable, ReaderContext, ReaderExt};
20use crate::sync::{Arc, FxHashMap, RwLock, RwLockExt};
21use crate::{PdfData, object};
22use alloc::vec;
23use alloc::vec::Vec;
24use core::cmp::max;
25use core::iter;
26use core::ops::Deref;
27use log::{error, warn};
28
29pub(crate) const XREF_ENTRY_LEN: usize = 20;
30
31#[derive(Debug, Copy, Clone)]
32pub(crate) enum XRefError {
33 Unknown,
34 Encryption(DecryptionError),
35}
36
37pub(crate) fn root_xref(data: PdfData, password: &[u8]) -> Result<XRef, XRefError> {
39 let mut xref_map = FxHashMap::default();
40 let xref_pos = find_last_xref_pos(data.as_ref()).ok_or(XRefError::Unknown)?;
41 let trailer =
42 populate_xref_impl(data.as_ref(), xref_pos, &mut xref_map).ok_or(XRefError::Unknown)?;
43
44 XRef::new(
45 data.clone(),
46 xref_map,
47 XRefInput::TrailerDictData(trailer),
48 false,
49 password,
50 )
51}
52
53pub(crate) fn fallback(data: PdfData, password: &[u8]) -> Option<XRef> {
55 warn!("xref table was invalid, trying to manually build xref table");
56 let (xref_map, xref_input) = fallback_xref_map(&data, password);
57
58 if let Some(xref_input) = xref_input {
59 warn!("rebuild xref table with {} entries", xref_map.len());
60
61 XRef::new(data.clone(), xref_map, xref_input, true, password).ok()
62 } else {
63 warn!("couldn't find trailer dictionary, failed to rebuild xref table");
64
65 None
66 }
67}
68
69fn fallback_xref_map<'a>(data: &'a PdfData, password: &[u8]) -> (XrefMap, Option<XRefInput<'a>>) {
70 fallback_xref_map_inner(data, ReaderContext::dummy(), true, password)
71}
72
73fn fallback_xref_map_inner<'a>(
74 data: &'a PdfData,
75 mut dummy_ctx: ReaderContext<'a>,
76 recurse: bool,
77 password: &[u8],
78) -> (XrefMap, Option<XRefInput<'a>>) {
79 let mut xref_map = FxHashMap::default();
80 let mut trailer_dicts = vec![];
81 let mut root_ref = None;
82
83 let mut r = Reader::new(data.as_ref());
84
85 let mut last_obj_num = None;
86
87 loop {
88 let cur_pos = r.offset();
89
90 let mut old_r = r.clone();
91
92 if let Some(obj_id) = r.read::<ObjectIdentifier>(&dummy_ctx) {
93 let mut cloned = r.clone();
94 cloned.skip_white_spaces_and_comments();
96 if cloned.skip::<Object<'_>>(false).is_some() {
97 xref_map.insert(obj_id, EntryType::Normal(cur_pos));
98 last_obj_num = Some(obj_id);
99 dummy_ctx.set_obj_number(obj_id);
100 }
101 } else if let Some(dict) = r.read::<Dict<'_>>(&dummy_ctx) {
102 if dict.contains_key(ROOT) {
103 trailer_dicts.push(dict.clone());
104 }
105
106 if dict
107 .get::<Name>(TYPE)
108 .is_some_and(|n| n.as_str() == "Catalog")
109 {
110 root_ref = last_obj_num;
111 }
112
113 if let Some(stream) = old_r.read::<Stream<'_>>(&dummy_ctx)
114 && stream.dict().get::<Name>(TYPE).as_deref() == Some(b"ObjStm")
115 && let Some(data) = stream.decoded().ok()
116 && let Some(last_obj_num) = last_obj_num
117 && let Some(obj_stream) = ObjectStream::new(stream, &data, &dummy_ctx)
118 {
119 for (idx, (obj_num, _)) in obj_stream.offsets.iter().enumerate() {
120 let id = ObjectIdentifier::new(*obj_num as i32, 0);
121 if xref_map
126 .get(&id)
127 .is_none_or(|e| !matches!(e, &EntryType::Normal(_)))
128 {
129 xref_map.insert(
130 id,
131 EntryType::ObjStream(last_obj_num.obj_number as u32, idx as u32),
132 );
133 }
134 }
135 }
136 } else {
137 r.read_byte();
138 }
139
140 if r.at_end() {
141 break;
142 }
143 }
144
145 let mut trailer_dict = None;
147
148 for dict in trailer_dicts {
149 if let Some(root_id) = dict.get_raw::<Dict<'_>>(ROOT) {
150 let check = |dict: &Dict<'_>| -> bool { dict.contains_key(PAGES) };
151
152 match root_id {
153 MaybeRef::Ref(r) => match xref_map.get(&r.into()) {
154 Some(EntryType::Normal(offset)) => {
155 let mut reader = Reader::new(&data.as_ref()[*offset..]);
156
157 if let Some(obj) =
158 reader.read_with_context::<IndirectObject<Dict<'_>>>(&dummy_ctx)
159 && check(&obj.clone().get())
160 {
161 trailer_dict = Some(dict);
162 }
163 }
164 Some(EntryType::ObjStream(obj_num, idx)) => {
165 if let Some(EntryType::Normal(offset)) =
166 xref_map.get(&ObjectIdentifier::new(*obj_num as i32, 0))
167 {
168 let mut reader = Reader::new(&data.as_ref()[*offset..]);
169
170 if let Some(stream) =
171 reader.read_with_context::<IndirectObject<Stream<'_>>>(&dummy_ctx)
172 && let Some(data) = stream.clone().get().decoded().ok()
173 && let Some(object_stream) =
174 ObjectStream::new(stream.get(), &data, &dummy_ctx)
175 && let Some(obj) = object_stream.get::<Dict<'_>>(*idx)
176 && check(&obj)
177 {
178 trailer_dict = Some(dict);
179 }
180 }
181 }
182 _ => {}
183 },
184 MaybeRef::NotRef(d) => {
185 if check(&d) {
186 trailer_dict = Some(dict);
187 }
188 }
189 }
190 }
191 }
192
193 let has_encryption = trailer_dict
194 .as_ref()
195 .is_some_and(|t| t.contains_key(ENCRYPT));
196
197 if has_encryption && recurse {
198 if let Some(Ok(xref)) = trailer_dict.as_ref().map(|d| {
203 XRef::new(
204 data.clone(),
205 xref_map.clone(),
206 XRefInput::TrailerDictData(d.data()),
207 true,
208 password,
209 )
210 }) {
211 let ctx = ReaderContext::new(&xref, false);
212 let (patched_map, _) = fallback_xref_map_inner(data, ctx, false, password);
213 xref_map = patched_map;
214 }
215 }
216
217 if let Some(trailer_dict_data) = trailer_dict.map(|d| d.data()) {
218 (
219 xref_map,
220 Some(XRefInput::TrailerDictData(trailer_dict_data)),
221 )
222 } else if let Some(root_ref) = root_ref {
223 (xref_map, Some(XRefInput::RootRef(root_ref)))
224 } else {
225 (xref_map, None)
226 }
227}
228
229const DUMMY_XREF: XRef = XRef(Inner::Dummy);
230
231#[derive(Debug, Clone)]
233pub struct XRef(Inner);
234
235impl XRef {
236 fn new(
237 data: PdfData,
238 xref_map: XrefMap,
239 input: XRefInput<'_>,
240 repaired: bool,
241 password: &[u8],
242 ) -> Result<Self, XRefError> {
243 let trailer_data = TrailerData::dummy();
247
248 let mut xref = Self(Inner::Some(Arc::new(SomeRepr {
249 data: Arc::new(Data::new(data)),
250 map: Arc::new(RwLock::new(MapRepr { xref_map, repaired })),
251 decryptor: Arc::new(Decryptor::None),
252 has_ocgs: false,
253 metadata: Arc::new(Metadata::default()),
254 trailer_data,
255 password: password.to_vec(),
256 })));
257
258 let decryptor = {
263 match input {
264 XRefInput::TrailerDictData(trailer_dict_data) => {
265 let mut r = Reader::new(trailer_dict_data);
266
267 let trailer_dict = r
268 .read_with_context::<Dict<'_>>(&ReaderContext::new(&xref, false))
269 .ok_or(XRefError::Unknown)?;
270
271 get_decryptor(&trailer_dict, password)?
272 }
273 XRefInput::RootRef(_) => Decryptor::None,
274 }
275 };
276
277 match &mut xref.0 {
278 Inner::Dummy => unreachable!(),
279 Inner::Some(r) => {
280 let mutable = Arc::make_mut(r);
281 mutable.decryptor = Arc::new(decryptor.clone());
282 }
283 }
284
285 let (trailer_data, has_ocgs, metadata) = match input {
286 XRefInput::TrailerDictData(trailer_dict_data) => {
287 let mut r = Reader::new(trailer_dict_data);
288
289 let trailer_dict = r
290 .read_with_context::<Dict<'_>>(&ReaderContext::new(&xref, false))
291 .ok_or(XRefError::Unknown)?;
292
293 let root_ref = trailer_dict.get_ref(ROOT).ok_or(XRefError::Unknown)?;
294 let root = trailer_dict
295 .get::<Dict<'_>>(ROOT)
296 .ok_or(XRefError::Unknown)?;
297 let metadata = trailer_dict
298 .get::<Dict<'_>>(INFO)
299 .map(|d| parse_metadata(&d))
300 .unwrap_or_default();
301 let pages_ref = root.get_ref(PAGES).ok_or(XRefError::Unknown)?;
302 let has_ocgs = root.get::<Dict<'_>>(OCPROPERTIES).is_some();
303 let version = root
304 .get::<Name>(VERSION)
305 .and_then(|v| PdfVersion::from_bytes(v.deref()));
306
307 let td = TrailerData {
308 pages_ref: pages_ref.into(),
309 root_ref: root_ref.into(),
310 version,
311 };
312
313 (td, has_ocgs, metadata)
314 }
315 XRefInput::RootRef(root_ref) => {
316 let root = xref.get::<Dict<'_>>(root_ref).ok_or(XRefError::Unknown)?;
317 let pages_ref = root.get_ref(PAGES).ok_or(XRefError::Unknown)?;
318
319 let td = TrailerData {
320 pages_ref: pages_ref.into(),
321 root_ref,
322 version: None,
323 };
324
325 (td, false, Metadata::default())
326 }
327 };
328
329 match &mut xref.0 {
330 Inner::Dummy => unreachable!(),
331 Inner::Some(r) => {
332 let mutable = Arc::make_mut(r);
333 mutable.trailer_data = trailer_data;
334 mutable.decryptor = Arc::new(decryptor);
335 mutable.has_ocgs = has_ocgs;
336 mutable.metadata = Arc::new(metadata);
337 }
338 }
339
340 Ok(xref)
341 }
342
343 fn is_repaired(&self) -> bool {
344 match &self.0 {
345 Inner::Dummy => false,
346 Inner::Some(r) => {
347 let locked = r.map.get();
348 locked.repaired
349 }
350 }
351 }
352
353 pub(crate) fn dummy() -> &'static Self {
354 &DUMMY_XREF
355 }
356
357 pub(crate) fn len(&self) -> usize {
358 match &self.0 {
359 Inner::Dummy => 0,
360 Inner::Some(r) => r.map.get().xref_map.len(),
361 }
362 }
363
364 pub(crate) fn trailer_data(&self) -> &TrailerData {
365 match &self.0 {
366 Inner::Dummy => unreachable!(),
367 Inner::Some(r) => &r.trailer_data,
368 }
369 }
370
371 pub(crate) fn metadata(&self) -> &Metadata {
372 match &self.0 {
373 Inner::Dummy => unreachable!(),
374 Inner::Some(r) => &r.metadata,
375 }
376 }
377
378 pub fn root_id(&self) -> ObjectIdentifier {
380 self.trailer_data().root_ref
381 }
382
383 pub fn has_optional_content_groups(&self) -> bool {
385 match &self.0 {
386 Inner::Dummy => false,
387 Inner::Some(r) => r.has_ocgs,
388 }
389 }
390
391 pub(crate) fn objects(&self) -> impl IntoIterator<Item = Object<'_>> + '_ {
392 match &self.0 {
393 Inner::Dummy => unimplemented!(),
394 Inner::Some(r) => {
395 let locked = r.map.get();
396 let mut elements = locked
397 .xref_map
398 .iter()
399 .map(|(id, e)| {
400 let offset = match e {
401 EntryType::Normal(o) => (*o, 0),
402 EntryType::ObjStream(id, index) => {
403 if let Some(EntryType::Normal(offset)) =
404 locked.xref_map.get(&ObjectIdentifier::new(*id as i32, 0))
405 {
406 (*offset, *index)
407 } else {
408 (usize::MAX, 0)
409 }
410 }
411 };
412
413 (*id, offset)
414 })
415 .collect::<Vec<_>>();
416
417 elements.sort_by(|e1, e2| e1.1.cmp(&e2.1));
420
421 let mut iter = elements.into_iter();
422
423 iter::from_fn(move || {
424 for next in iter.by_ref() {
425 if let Some(obj) = self.get_with(next.0, &ReaderContext::new(self, false)) {
426 return Some(obj);
427 } else {
428 continue;
430 }
431 }
432
433 None
434 })
435 }
436 }
437 }
438
439 pub(crate) fn repair(&self) {
440 let Inner::Some(r) = &self.0 else {
441 unreachable!();
442 };
443
444 let mut locked = r
445 .map
446 .try_put()
447 .expect("xref repair: map lock not contended");
448 assert!(!locked.repaired);
449
450 let (xref_map, _) = fallback_xref_map(r.data.get(), &r.password);
451 locked.xref_map = xref_map;
452 locked.repaired = true;
453 }
454
455 #[inline]
456 pub(crate) fn needs_decryption(&self, ctx: &ReaderContext<'_>) -> bool {
457 match &self.0 {
458 Inner::Dummy => false,
459 Inner::Some(r) => {
460 if matches!(r.decryptor.as_ref(), Decryptor::None) {
461 false
462 } else {
463 !ctx.in_content_stream() && !ctx.in_object_stream()
464 }
465 }
466 }
467 }
468
469 #[inline]
470 pub(crate) fn decrypt(
471 &self,
472 id: ObjectIdentifier,
473 data: &[u8],
474 target: DecryptionTarget,
475 ) -> Option<Vec<u8>> {
476 match &self.0 {
477 Inner::Dummy => Some(data.to_vec()),
478 Inner::Some(r) => r.decryptor.decrypt(id, data, target),
479 }
480 }
481
482 #[allow(private_bounds)]
484 pub fn get<'a, T>(&'a self, id: ObjectIdentifier) -> Option<T>
485 where
486 T: ObjectLike<'a>,
487 {
488 let ctx = ReaderContext::new(self, false);
489 self.get_with(id, &ctx)
490 }
491
492 #[allow(private_bounds)]
494 pub(crate) fn get_with<'a, T>(
495 &'a self,
496 id: ObjectIdentifier,
497 ctx: &ReaderContext<'a>,
498 ) -> Option<T>
499 where
500 T: ObjectLike<'a>,
501 {
502 let Inner::Some(repr) = &self.0 else {
503 return None;
504 };
505
506 let locked = repr.map.try_get()?;
507
508 let mut r = Reader::new(repr.data.get().as_ref());
509
510 let entry = *locked.xref_map.get(&id).or({
511 None
514 })?;
515 drop(locked);
516
517 let mut ctx = ctx.clone();
518 ctx.set_obj_number(id);
519 ctx.set_in_content_stream(false);
520
521 match entry {
522 EntryType::Normal(offset) => {
523 ctx.set_in_object_stream(false);
524 r.jump(offset);
525
526 if let Some(object) = r.read_with_context::<IndirectObject<T>>(&ctx) {
527 if object.id() == &id {
528 return Some(object.get());
529 }
530 } else {
531 if r.skip_not_in_content_stream::<IndirectObject<Object<'_>>>()
534 .is_some()
535 {
536 return None;
537 }
538 };
539
540 if self.is_repaired() {
542 error!(
543 "attempt was made at repairing xref, but object {id:?} still couldn't be read"
544 );
545
546 None
547 } else {
548 warn!("broken xref, attempting to repair");
549
550 self.repair();
551
552 self.get_with::<T>(id, &ctx)
554 }
555 }
556 EntryType::ObjStream(obj_stram_gen_num, index) => {
557 let obj_stream_id = ObjectIdentifier::new(obj_stram_gen_num as i32, 0);
559
560 if obj_stream_id == id {
561 warn!("cycle detected in object stream");
562
563 return None;
564 }
565
566 let stream = self.get_with::<Stream<'_>>(obj_stream_id, &ctx)?;
567 let data = repr.data.get_with(obj_stream_id, &ctx)?;
568 let object_stream = ObjectStream::new(stream, data, &ctx)?;
569 object_stream.get(index)
570 }
571 }
572 }
573}
574
575#[derive(Debug, Copy, Clone)]
578pub(crate) enum XRefInput<'a> {
579 TrailerDictData(&'a [u8]),
582 RootRef(ObjectIdentifier),
590}
591
592pub(crate) fn find_last_xref_pos(data: &[u8]) -> Option<usize> {
593 let mut finder = Reader::new(data);
594 let mut pos = finder.len().checked_sub(1)?;
595 finder.jump(pos);
596
597 let needle = b"startxref";
598
599 loop {
600 if finder.forward_tag(needle).is_some() {
601 finder.skip_white_spaces_and_comments();
602
603 let offset = finder.read_without_context::<i32>()?.try_into().ok()?;
604
605 return Some(offset);
606 }
607
608 pos = pos.checked_sub(1)?;
609 finder.jump(pos);
610 }
611}
612
613#[derive(Debug, PartialEq, Eq, Clone, Copy)]
615enum EntryType {
616 Normal(usize),
618 ObjStream(u32, u32),
622}
623
624type XrefMap = FxHashMap<ObjectIdentifier, EntryType>;
625
626#[derive(Debug)]
628struct MapRepr {
629 xref_map: XrefMap,
630 repaired: bool,
631}
632
633#[derive(Debug, Copy, Clone)]
634pub(crate) struct TrailerData {
635 pub(crate) pages_ref: ObjectIdentifier,
636 pub(crate) root_ref: ObjectIdentifier,
637 pub(crate) version: Option<PdfVersion>,
638}
639
640impl TrailerData {
641 pub(crate) fn dummy() -> Self {
642 Self {
643 pages_ref: ObjectIdentifier::new(0, 0),
644 root_ref: ObjectIdentifier::new(0, 0),
645 version: None,
646 }
647 }
648}
649
650#[derive(Debug, Clone)]
651struct SomeRepr {
652 data: Arc<Data>,
653 map: Arc<RwLock<MapRepr>>,
654 metadata: Arc<Metadata>,
655 decryptor: Arc<Decryptor>,
656 has_ocgs: bool,
657 password: Vec<u8>,
658 trailer_data: TrailerData,
659}
660
661#[derive(Debug, Clone)]
662enum Inner {
663 Dummy,
665 Some(Arc<SomeRepr>),
667}
668
669#[derive(Debug)]
670struct XRefEntry {
671 offset: usize,
672 gen_number: i32,
673 used: bool,
674}
675
676impl XRefEntry {
677 pub(crate) fn read(data: &[u8]) -> Option<Self> {
678 #[inline(always)]
679 fn parse_u32(data: &[u8]) -> Option<u32> {
680 let mut accum = 0_u32;
681
682 for byte in data {
683 accum = accum.checked_mul(10)?;
684
685 match *byte {
686 b'0'..=b'9' => accum = accum.checked_add((*byte - b'0') as u32)?,
687 _ => return None,
688 }
689 }
690
691 Some(accum)
692 }
693
694 let offset = parse_u32(&data[0..10])? as usize;
695 let gen_number = i32::try_from(parse_u32(&data[11..16])?).ok()?;
696
697 let used = data[17] == b'n';
698
699 Some(Self {
700 offset,
701 gen_number,
702 used,
703 })
704 }
705}
706
707const MAX_XREF_CHAIN_DEPTH: usize = 64;
710
711fn populate_xref_impl<'a>(data: &'a [u8], pos: usize, xref_map: &mut XrefMap) -> Option<&'a [u8]> {
712 populate_xref_depth(data, pos, xref_map, 0)
713}
714
715fn populate_xref_depth<'a>(
716 data: &'a [u8],
717 pos: usize,
718 xref_map: &mut XrefMap,
719 depth: usize,
720) -> Option<&'a [u8]> {
721 if depth > MAX_XREF_CHAIN_DEPTH {
722 log::warn!("Xref chain depth exceeds {MAX_XREF_CHAIN_DEPTH}, stopping traversal");
723 return None;
724 }
725 let mut reader = Reader::new(data);
726 reader.jump(pos);
727 reader.skip_white_spaces_and_comments();
729
730 let mut r2 = reader.clone();
731 if reader
732 .clone()
733 .read_without_context::<ObjectIdentifier>()
734 .is_some()
735 {
736 populate_from_xref_stream(data, &mut r2, xref_map, depth)
737 } else {
738 populate_from_xref_table(data, &mut r2, xref_map, depth)
739 }
740}
741
742pub(super) struct SubsectionHeader {
743 pub(super) start: u32,
744 pub(super) num_entries: u32,
745}
746
747impl Readable<'_> for SubsectionHeader {
748 fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
749 r.skip_white_spaces();
750 let start = r.read_without_context::<u32>()?;
751 r.skip_white_spaces();
752 let num_entries = r.read_without_context::<u32>()?;
753 r.skip_white_spaces();
754
755 Some(Self { start, num_entries })
756 }
757}
758
759fn populate_from_xref_table<'a>(
761 data: &'a [u8],
762 reader: &mut Reader<'a>,
763 insert_map: &mut XrefMap,
764 depth: usize,
765) -> Option<&'a [u8]> {
766 let trailer = {
767 let mut reader = reader.clone();
768 read_xref_table_trailer(&mut reader, &ReaderContext::dummy())?
769 };
770
771 reader.skip_white_spaces();
772 reader.forward_tag(b"xref")?;
773 reader.skip_white_spaces();
774
775 let mut max_obj = 0;
776
777 if let Some(prev) = trailer.get::<i32>(PREV) {
778 populate_xref_depth(data, prev as usize, insert_map, depth + 1)?;
780 }
781
782 if let Some(xref_stm) = trailer.get::<i32>(XREF_STM) {
785 populate_xref_depth(data, xref_stm as usize, insert_map, depth + 1)?;
786 }
787
788 while let Some(header) = reader.read_without_context::<SubsectionHeader>() {
789 reader.skip_white_spaces();
790
791 let start = header.start;
792 let end = start + header.num_entries;
793
794 for obj_number in start..end {
795 max_obj = max(max_obj, obj_number);
796 let bytes = reader.read_bytes(XREF_ENTRY_LEN)?;
797 let entry = XRefEntry::read(bytes)?;
798
799 if entry.used {
802 insert_map.insert(
803 ObjectIdentifier::new(obj_number as i32, entry.gen_number),
804 EntryType::Normal(entry.offset),
805 );
806 }
807 }
808 }
809
810 Some(trailer.data())
811}
812
813fn populate_from_xref_stream<'a>(
814 data: &'a [u8],
815 reader: &mut Reader<'a>,
816 insert_map: &mut XrefMap,
817 depth: usize,
818) -> Option<&'a [u8]> {
819 let stream = reader
820 .read_with_context::<IndirectObject<Stream<'_>>>(&ReaderContext::dummy())?
821 .get();
822
823 if let Some(prev) = stream.dict().get::<i32>(PREV) {
824 let _ = populate_xref_depth(data, prev as usize, insert_map, depth + 1)?;
826 }
827
828 let size = stream.dict().get::<u32>(SIZE)?;
829
830 let [f1_len, f2_len, f3_len] = stream.dict().get::<[u8; 3]>(W)?;
831
832 if f2_len > size_of::<u64>() as u8 {
833 error!("xref offset length is larger than the allowed limit");
834
835 return None;
836 }
837
838 if f1_len != 1 {
840 warn!("first field in xref stream was longer than 1");
841 }
842
843 let xref_data = stream.decoded().ok()?;
844 let mut xref_reader = Reader::new(xref_data.as_ref());
845
846 if let Some(arr) = stream.dict().get::<Array<'_>>(INDEX) {
847 let iter = arr.iter::<(u32, u32)>();
848
849 for (start, num_elements) in iter {
850 xref_stream_subsection(
851 &mut xref_reader,
852 start,
853 num_elements,
854 f1_len,
855 f2_len,
856 f3_len,
857 insert_map,
858 )?;
859 }
860 } else {
861 xref_stream_subsection(
862 &mut xref_reader,
863 0,
864 size,
865 f1_len,
866 f2_len,
867 f3_len,
868 insert_map,
869 )?;
870 }
871
872 Some(stream.dict().data())
873}
874
875fn xref_stream_num(data: &[u8]) -> Option<u32> {
876 Some(match data.len() {
877 0 => return None,
878 1 => u8::from_be(data[0]) as u32,
879 2 => u16::from_be_bytes(data[0..2].try_into().ok()?) as u32,
880 3 => u32::from_be_bytes([0, data[0], data[1], data[2]]),
881 4 => u32::from_be_bytes(data[0..4].try_into().ok()?),
882 8 => {
883 if let Ok(num) = u32::try_from(u64::from_be_bytes(data[0..8].try_into().ok()?)) {
884 return Some(num);
885 } else {
886 warn!("xref stream number is too large");
887
888 return None;
889 }
890 }
891 n => {
892 warn!("invalid xref stream number {n}");
893
894 return None;
895 }
896 })
897}
898
899fn xref_stream_subsection<'a>(
900 xref_reader: &mut Reader<'a>,
901 start: u32,
902 num_elements: u32,
903 f1_len: u8,
904 f2_len: u8,
905 f3_len: u8,
906 insert_map: &mut XrefMap,
907) -> Option<()> {
908 for i in 0..num_elements {
909 let f_type = if f1_len == 0 {
910 1
911 } else {
912 xref_reader.read_bytes(1)?[0]
914 };
915
916 let obj_number = start + i;
917
918 match f_type {
919 0 => {
921 xref_reader.skip_bytes(f2_len as usize + f3_len as usize)?;
922 }
923 1 => {
924 let offset = if f2_len > 0 {
925 let data = xref_reader.read_bytes(f2_len as usize)?;
926 xref_stream_num(data)?
927 } else {
928 0
929 };
930
931 let gen_number = if f3_len > 0 {
932 let data = xref_reader.read_bytes(f3_len as usize)?;
933 xref_stream_num(data)?
934 } else {
935 0
936 };
937
938 insert_map.insert(
939 ObjectIdentifier::new(obj_number as i32, gen_number as i32),
940 EntryType::Normal(offset as usize),
941 );
942 }
943 2 => {
944 let obj_stream_number = {
945 let data = xref_reader.read_bytes(f2_len as usize)?;
946 xref_stream_num(data)?
947 };
948 let gen_number = 0;
949 let index = if f3_len > 0 {
950 let data = xref_reader.read_bytes(f3_len as usize)?;
951 xref_stream_num(data)?
952 } else {
953 0
954 };
955
956 insert_map.insert(
957 ObjectIdentifier::new(obj_number as i32, gen_number),
958 EntryType::ObjStream(obj_stream_number, index),
959 );
960 }
961 _ => {
962 warn!("xref has unknown field type {f_type}");
963
964 return None;
965 }
966 }
967 }
968
969 Some(())
970}
971
972fn read_xref_table_trailer<'a>(
973 reader: &mut Reader<'a>,
974 ctx: &ReaderContext<'a>,
975) -> Option<Dict<'a>> {
976 reader.skip_white_spaces();
977 reader.forward_tag(b"xref")?;
978 reader.skip_white_spaces();
979
980 while let Some(header) = reader.read_without_context::<SubsectionHeader>() {
981 reader.jump(reader.offset() + XREF_ENTRY_LEN * header.num_entries as usize);
982 }
983
984 reader.skip_white_spaces();
985 reader.forward_tag(b"trailer")?;
986 reader.skip_white_spaces();
987
988 reader.read_with_context::<Dict<'_>>(ctx)
989}
990
991fn get_decryptor(trailer_dict: &Dict<'_>, password: &[u8]) -> Result<Decryptor, XRefError> {
992 if let Some(encryption_dict) = trailer_dict.get::<Dict<'_>>(ENCRYPT) {
993 let id = if let Some(id) = trailer_dict
994 .get::<Array<'_>>(ID)
995 .and_then(|a| a.flex_iter().next::<object::String>())
996 {
997 id.to_vec()
998 } else {
999 vec![]
1001 };
1002
1003 get(&encryption_dict, &id, password).map_err(XRefError::Encryption)
1004 } else {
1005 Ok(Decryptor::None)
1006 }
1007}
1008
1009struct ObjectStream<'a> {
1010 data: &'a [u8],
1011 ctx: ReaderContext<'a>,
1012 offsets: Vec<(u32, usize)>,
1013}
1014
1015impl<'a> ObjectStream<'a> {
1016 fn new(inner: Stream<'_>, data: &'a [u8], ctx: &ReaderContext<'a>) -> Option<Self> {
1017 let num_objects = inner.dict().get::<usize>(N)?;
1018 let first_offset = inner.dict().get::<usize>(FIRST)?;
1019
1020 let mut r = Reader::new(data);
1021
1022 let mut offsets = vec![];
1023
1024 for _ in 0..num_objects {
1025 r.skip_white_spaces_and_comments();
1026 let obj_num = r.read_without_context::<u32>()?;
1028 r.skip_white_spaces_and_comments();
1029 let relative_offset = r.read_without_context::<usize>()?;
1030 offsets.push((obj_num, first_offset + relative_offset));
1031 }
1032
1033 let mut ctx = ctx.clone();
1034 ctx.set_in_object_stream(true);
1035
1036 Some(Self { data, ctx, offsets })
1037 }
1038
1039 fn get<T>(&self, index: u32) -> Option<T>
1040 where
1041 T: ObjectLike<'a>,
1042 {
1043 let offset = self.offsets.get(index as usize)?.1;
1044 let mut r = Reader::new(self.data);
1045 r.jump(offset);
1046 r.skip_white_spaces_and_comments();
1047
1048 r.read_with_context::<T>(&self.ctx)
1049 }
1050}
1051
1052fn parse_metadata(info_dict: &Dict<'_>) -> Metadata {
1053 Metadata {
1054 creation_date: info_dict
1055 .get::<object::String>(CREATION_DATE)
1056 .and_then(|c| DateTime::from_bytes(&c)),
1057 modification_date: info_dict
1058 .get::<object::String>(MOD_DATE)
1059 .and_then(|c| DateTime::from_bytes(&c)),
1060 title: info_dict.get::<object::String>(TITLE).map(|t| t.to_vec()),
1061 author: info_dict.get::<object::String>(AUTHOR).map(|t| t.to_vec()),
1062 subject: info_dict.get::<object::String>(SUBJECT).map(|t| t.to_vec()),
1063 keywords: info_dict
1064 .get::<object::String>(KEYWORDS)
1065 .map(|t| t.to_vec()),
1066 creator: info_dict.get::<object::String>(CREATOR).map(|t| t.to_vec()),
1067 producer: info_dict
1068 .get::<object::String>(PRODUCER)
1069 .map(|t| t.to_vec()),
1070 }
1071}