1use crate::crypto::{DecryptionError, DecryptionTarget, Decryptor, get};
4use crate::data::Data;
5use crate::metadata::Metadata;
6use crate::object::Name;
7use crate::object::ObjectIdentifier;
8use crate::object::Stream;
9use crate::object::dict::keys::{
10 AUTHOR, CREATION_DATE, CREATOR, ENCRYPT, FIRST, ID, INDEX, INFO, KEYWORDS, MOD_DATE, N,
11 OCPROPERTIES, PAGES, PREV, PRODUCER, ROOT, SIZE, SUBJECT, TITLE, TYPE, VERSION, W, XREF_STM,
12};
13use crate::object::indirect::IndirectObject;
14use crate::object::{Array, MaybeRef};
15use crate::object::{DateTime, Dict};
16use crate::object::{Object, ObjectLike};
17use crate::pdf::PdfVersion;
18use crate::reader::Reader;
19use crate::reader::{Readable, ReaderContext, ReaderExt};
20use crate::{PdfData, object};
21use log::{error, warn};
22use rustc_hash::FxHashMap;
23use std::cmp::max;
24use std::iter;
25use std::ops::Deref;
26use std::sync::{Arc, RwLock};
27
28pub(crate) const XREF_ENTRY_LEN: usize = 20;
29
30#[derive(Debug, Copy, Clone)]
31pub(crate) enum XRefError {
32 Unknown,
33 Encryption(DecryptionError),
34}
35
36pub(crate) fn root_xref(data: PdfData, password: &[u8]) -> Result<XRef, XRefError> {
38 let mut xref_map = FxHashMap::default();
39 let xref_pos = find_last_xref_pos(data.as_ref().as_ref()).ok_or(XRefError::Unknown)?;
40 let trailer = populate_xref_impl(data.as_ref().as_ref(), xref_pos, &mut xref_map)
41 .ok_or(XRefError::Unknown)?;
42
43 XRef::new(
44 data.clone(),
45 xref_map,
46 XRefInput::TrailerDictData(trailer),
47 false,
48 password,
49 )
50}
51
52pub(crate) fn fallback(data: PdfData, password: &[u8]) -> Option<XRef> {
54 warn!("xref table was invalid, trying to manually build xref table");
55 let (xref_map, xref_input) = fallback_xref_map(&data, password);
56
57 if let Some(xref_input) = xref_input {
58 warn!("rebuild xref table with {} entries", xref_map.len());
59
60 XRef::new(data.clone(), xref_map, xref_input, true, password).ok()
61 } else {
62 warn!("couldn't find trailer dictionary, failed to rebuild xref table");
63
64 None
65 }
66}
67
68fn fallback_xref_map<'a>(data: &'a PdfData, password: &[u8]) -> (XrefMap, Option<XRefInput<'a>>) {
69 fallback_xref_map_inner(data, ReaderContext::dummy(), true, password)
70}
71
72fn fallback_xref_map_inner<'a>(
73 data: &'a PdfData,
74 mut dummy_ctx: ReaderContext<'a>,
75 recurse: bool,
76 password: &[u8],
77) -> (XrefMap, Option<XRefInput<'a>>) {
78 let mut xref_map = FxHashMap::default();
79 let mut trailer_dicts = vec![];
80 let mut root_ref = None;
81
82 let mut r = Reader::new(data.as_ref().as_ref());
83
84 let mut last_obj_num = None;
85
86 loop {
87 let cur_pos = r.offset();
88
89 let mut old_r = r.clone();
90
91 if let Some(obj_id) = r.read::<ObjectIdentifier>(&dummy_ctx) {
92 let mut cloned = r.clone();
93 cloned.skip_white_spaces_and_comments();
95 if cloned.skip::<Object<'_>>(false).is_some() {
96 xref_map.insert(obj_id, EntryType::Normal(cur_pos));
97 last_obj_num = Some(obj_id);
98 dummy_ctx.obj_number = Some(obj_id);
99 }
100 } else if let Some(dict) = r.read::<Dict<'_>>(&dummy_ctx) {
101 if dict.contains_key(ROOT) {
102 trailer_dicts.push(dict.clone());
103 }
104
105 if dict
106 .get::<Name<'_>>(TYPE)
107 .is_some_and(|n| n.as_str() == "Catalog")
108 {
109 root_ref = last_obj_num;
110 }
111
112 if let Some(stream) = old_r.read::<Stream<'_>>(&dummy_ctx)
113 && stream.dict().get::<Name<'_>>(TYPE).as_deref() == Some(b"ObjStm")
114 && let Some(data) = stream.decoded().ok()
115 && let Some(last_obj_num) = last_obj_num
116 && let Some(obj_stream) = ObjectStream::new(stream, &data, &dummy_ctx)
117 {
118 for (idx, (obj_num, _)) in obj_stream.offsets.iter().enumerate() {
119 let id = ObjectIdentifier::new(*obj_num as i32, 0);
120 if xref_map
125 .get(&id)
126 .is_none_or(|e| !matches!(e, &EntryType::Normal(_)))
127 {
128 xref_map.insert(
129 id,
130 EntryType::ObjStream(last_obj_num.obj_num as u32, idx as u32),
131 );
132 }
133 }
134 }
135 } else {
136 r.read_byte();
137 }
138
139 if r.at_end() {
140 break;
141 }
142 }
143
144 let mut trailer_dict = None;
146
147 for dict in trailer_dicts {
148 if let Some(root_id) = dict.get_raw::<Dict<'_>>(ROOT) {
149 let check = |dict: &Dict<'_>| -> bool { dict.contains_key(PAGES) };
150
151 match root_id {
152 MaybeRef::Ref(r) => match xref_map.get(&r.into()) {
153 Some(EntryType::Normal(offset)) => {
154 let mut reader = Reader::new(&data.as_ref().as_ref()[*offset..]);
155
156 if let Some(obj) =
157 reader.read_with_context::<IndirectObject<Dict<'_>>>(&dummy_ctx)
158 && check(&obj.clone().get())
159 {
160 trailer_dict = Some(dict);
161 }
162 }
163 Some(EntryType::ObjStream(obj_num, idx)) => {
164 if let Some(EntryType::Normal(offset)) =
165 xref_map.get(&ObjectIdentifier::new(*obj_num as i32, 0))
166 {
167 let mut reader = Reader::new(&data.as_ref().as_ref()[*offset..]);
168
169 if let Some(stream) =
170 reader.read_with_context::<IndirectObject<Stream<'_>>>(&dummy_ctx)
171 && let Some(data) = stream.clone().get().decoded().ok()
172 && let Some(object_stream) =
173 ObjectStream::new(stream.get(), &data, &dummy_ctx)
174 && let Some(obj) = object_stream.get::<Dict<'_>>(*idx)
175 && check(&obj)
176 {
177 trailer_dict = Some(dict);
178 }
179 }
180 }
181 _ => {}
182 },
183 MaybeRef::NotRef(d) => {
184 if check(&d) {
185 trailer_dict = Some(dict);
186 }
187 }
188 }
189 }
190 }
191
192 let has_encryption = trailer_dict
193 .as_ref()
194 .is_some_and(|t| t.contains_key(ENCRYPT));
195
196 if has_encryption && recurse {
197 if let Ok(xref) = XRef::new(
202 data.clone(),
203 xref_map.clone(),
204 XRefInput::TrailerDictData(trailer_dict.as_ref().map(|d| d.data()).unwrap()),
205 true,
206 password,
207 ) {
208 let ctx = ReaderContext::new(&xref, false);
209 let (patched_map, _) = fallback_xref_map_inner(data, ctx, false, password);
210 xref_map = patched_map;
211 }
212 }
213
214 if let Some(trailer_dict_data) = trailer_dict.map(|d| d.data()) {
215 (
216 xref_map,
217 Some(XRefInput::TrailerDictData(trailer_dict_data)),
218 )
219 } else if let Some(root_ref) = root_ref {
220 (xref_map, Some(XRefInput::RootRef(root_ref)))
221 } else {
222 (xref_map, None)
223 }
224}
225
226static DUMMY_XREF: &XRef = &XRef(Inner::Dummy);
227
228#[derive(Debug, Clone)]
230pub struct XRef(Inner);
231
232impl XRef {
233 fn new(
234 data: PdfData,
235 xref_map: XrefMap,
236 input: XRefInput<'_>,
237 repaired: bool,
238 password: &[u8],
239 ) -> Result<Self, XRefError> {
240 let trailer_data = TrailerData::dummy();
244
245 let mut xref = Self(Inner::Some(Arc::new(SomeRepr {
246 data: Arc::new(Data::new(data)),
247 map: Arc::new(RwLock::new(MapRepr { xref_map, repaired })),
248 decryptor: Arc::new(Decryptor::None),
249 has_ocgs: false,
250 metadata: Arc::new(Metadata::default()),
251 trailer_data,
252 password: password.to_vec(),
253 })));
254
255 let decryptor = {
260 match input {
261 XRefInput::TrailerDictData(trailer_dict_data) => {
262 let mut r = Reader::new(trailer_dict_data);
263
264 let trailer_dict = r
265 .read_with_context::<Dict<'_>>(&ReaderContext::new(&xref, false))
266 .ok_or(XRefError::Unknown)?;
267
268 get_decryptor(&trailer_dict, password)?
269 }
270 XRefInput::RootRef(_) => Decryptor::None,
271 }
272 };
273
274 match &mut xref.0 {
275 Inner::Dummy => unreachable!(),
276 Inner::Some(r) => {
277 let mutable = Arc::make_mut(r);
278 mutable.decryptor = Arc::new(decryptor.clone());
279 }
280 }
281
282 let (trailer_data, has_ocgs, metadata) = match input {
283 XRefInput::TrailerDictData(trailer_dict_data) => {
284 let mut r = Reader::new(trailer_dict_data);
285
286 let trailer_dict = r
287 .read_with_context::<Dict<'_>>(&ReaderContext::new(&xref, false))
288 .ok_or(XRefError::Unknown)?;
289
290 let root_ref = trailer_dict.get_ref(ROOT).ok_or(XRefError::Unknown)?;
291 let root = trailer_dict
292 .get::<Dict<'_>>(ROOT)
293 .ok_or(XRefError::Unknown)?;
294 let metadata = trailer_dict
295 .get::<Dict<'_>>(INFO)
296 .map(|d| parse_metadata(&d))
297 .unwrap_or_default();
298 let pages_ref = root.get_ref(PAGES).ok_or(XRefError::Unknown)?;
299 let has_ocgs = root.get::<Dict<'_>>(OCPROPERTIES).is_some();
300 let version = root
301 .get::<Name<'_>>(VERSION)
302 .and_then(|v| PdfVersion::from_bytes(v.deref()));
303
304 let td = TrailerData {
305 pages_ref: pages_ref.into(),
306 root_ref: root_ref.into(),
307 version,
308 };
309
310 (td, has_ocgs, metadata)
311 }
312 XRefInput::RootRef(root_ref) => {
313 let root = xref.get::<Dict<'_>>(root_ref).ok_or(XRefError::Unknown)?;
314 let pages_ref = root.get_ref(PAGES).ok_or(XRefError::Unknown)?;
315
316 let td = TrailerData {
317 pages_ref: pages_ref.into(),
318 root_ref,
319 version: None,
320 };
321
322 (td, false, Metadata::default())
323 }
324 };
325
326 match &mut xref.0 {
327 Inner::Dummy => unreachable!(),
328 Inner::Some(r) => {
329 let mutable = Arc::make_mut(r);
330 mutable.trailer_data = trailer_data;
331 mutable.decryptor = Arc::new(decryptor);
332 mutable.has_ocgs = has_ocgs;
333 mutable.metadata = Arc::new(metadata);
334 }
335 }
336
337 Ok(xref)
338 }
339
340 fn is_repaired(&self) -> bool {
341 match &self.0 {
342 Inner::Dummy => false,
343 Inner::Some(r) => {
344 let locked = r.map.read().unwrap();
345 locked.repaired
346 }
347 }
348 }
349
350 pub(crate) fn dummy() -> &'static Self {
351 DUMMY_XREF
352 }
353
354 pub(crate) fn len(&self) -> usize {
355 match &self.0 {
356 Inner::Dummy => 0,
357 Inner::Some(r) => r.map.read().unwrap().xref_map.len(),
358 }
359 }
360
361 pub(crate) fn trailer_data(&self) -> &TrailerData {
362 match &self.0 {
363 Inner::Dummy => unreachable!(),
364 Inner::Some(r) => &r.trailer_data,
365 }
366 }
367
368 pub(crate) fn metadata(&self) -> &Metadata {
369 match &self.0 {
370 Inner::Dummy => unreachable!(),
371 Inner::Some(r) => &r.metadata,
372 }
373 }
374
375 pub fn root_id(&self) -> ObjectIdentifier {
377 self.trailer_data().root_ref
378 }
379
380 pub fn has_optional_content_groups(&self) -> bool {
382 match &self.0 {
383 Inner::Dummy => false,
384 Inner::Some(r) => r.has_ocgs,
385 }
386 }
387
388 pub(crate) fn objects(&self) -> impl IntoIterator<Item = Object<'_>> + '_ {
389 match &self.0 {
390 Inner::Dummy => unimplemented!(),
391 Inner::Some(r) => {
392 let locked = r.map.read().unwrap();
393 let mut elements = locked
394 .xref_map
395 .iter()
396 .map(|(id, e)| {
397 let offset = match e {
398 EntryType::Normal(o) => (*o, 0),
399 EntryType::ObjStream(id, index) => {
400 if let Some(EntryType::Normal(offset)) =
401 locked.xref_map.get(&ObjectIdentifier::new(*id as i32, 0))
402 {
403 (*offset, *index)
404 } else {
405 (usize::MAX, 0)
406 }
407 }
408 };
409
410 (*id, offset)
411 })
412 .collect::<Vec<_>>();
413
414 elements.sort_by(|e1, e2| e1.1.cmp(&e2.1));
417
418 let mut iter = elements.into_iter();
419
420 iter::from_fn(move || {
421 for next in iter.by_ref() {
422 if let Some(obj) = self.get_with(next.0, &ReaderContext::new(self, false)) {
423 return Some(obj);
424 } else {
425 continue;
427 }
428 }
429
430 None
431 })
432 }
433 }
434 }
435
436 pub(crate) fn repair(&self) {
437 let Inner::Some(r) = &self.0 else {
438 unreachable!();
439 };
440
441 let mut locked = r.map.try_write().unwrap();
442 assert!(!locked.repaired);
443
444 let (xref_map, _) = fallback_xref_map(r.data.get(), &r.password);
445 locked.xref_map = xref_map;
446 locked.repaired = true;
447 }
448
449 #[inline]
450 pub(crate) fn needs_decryption(&self, ctx: &ReaderContext<'_>) -> bool {
451 match &self.0 {
452 Inner::Dummy => false,
453 Inner::Some(r) => {
454 if matches!(r.decryptor.as_ref(), Decryptor::None) {
455 false
456 } else {
457 !ctx.in_content_stream && !ctx.in_object_stream
458 }
459 }
460 }
461 }
462
463 #[inline]
464 pub(crate) fn decrypt(
465 &self,
466 id: ObjectIdentifier,
467 data: &[u8],
468 target: DecryptionTarget,
469 ) -> Option<Vec<u8>> {
470 match &self.0 {
471 Inner::Dummy => Some(data.to_vec()),
472 Inner::Some(r) => r.decryptor.decrypt(id, data, target),
473 }
474 }
475
476 #[allow(private_bounds)]
478 pub fn get<'a, T>(&'a self, id: ObjectIdentifier) -> Option<T>
479 where
480 T: ObjectLike<'a>,
481 {
482 let ctx = ReaderContext::new(self, false);
483 self.get_with(id, &ctx)
484 }
485
486 #[allow(private_bounds)]
488 pub(crate) fn get_with<'a, T>(
489 &'a self,
490 id: ObjectIdentifier,
491 ctx: &ReaderContext<'a>,
492 ) -> Option<T>
493 where
494 T: ObjectLike<'a>,
495 {
496 let Inner::Some(repr) = &self.0 else {
497 return None;
498 };
499
500 let locked = repr.map.try_read().unwrap();
501
502 let mut r = Reader::new(repr.data.get().as_ref().as_ref());
503
504 let entry = *locked.xref_map.get(&id).or({
505 None
508 })?;
509 drop(locked);
510
511 let mut ctx = ctx.clone();
512 ctx.obj_number = Some(id);
513 ctx.in_content_stream = false;
514
515 match entry {
516 EntryType::Normal(offset) => {
517 ctx.in_object_stream = false;
518 r.jump(offset);
519
520 if let Some(object) = r.read_with_context::<IndirectObject<T>>(&ctx) {
521 if object.id() == &id {
522 return Some(object.get());
523 }
524 } else {
525 if r.skip_not_in_content_stream::<IndirectObject<Object<'_>>>()
528 .is_some()
529 {
530 return None;
531 }
532 };
533
534 if self.is_repaired() {
536 error!(
537 "attempt was made at repairing xref, but object {id:?} still couldn't be read"
538 );
539
540 None
541 } else {
542 warn!("broken xref, attempting to repair");
543
544 self.repair();
545
546 self.get_with::<T>(id, &ctx)
548 }
549 }
550 EntryType::ObjStream(obj_stram_gen_num, index) => {
551 let obj_stream_id = ObjectIdentifier::new(obj_stram_gen_num as i32, 0);
553
554 if obj_stream_id == id {
555 warn!("cycle detected in object stream");
556
557 return None;
558 }
559
560 let stream = self.get_with::<Stream<'_>>(obj_stream_id, &ctx)?;
561 let data = repr.data.get_with(obj_stream_id, &ctx)?;
562 let object_stream = ObjectStream::new(stream, data, &ctx)?;
563 object_stream.get(index)
564 }
565 }
566 }
567}
568
569#[derive(Debug, Copy, Clone)]
572pub(crate) enum XRefInput<'a> {
573 TrailerDictData(&'a [u8]),
576 RootRef(ObjectIdentifier),
584}
585
586pub(crate) fn find_last_xref_pos(data: &[u8]) -> Option<usize> {
587 let mut finder = Reader::new(data);
588 let mut pos = finder.len().checked_sub(1)?;
589 finder.jump(pos);
590
591 let needle = b"startxref";
592
593 loop {
594 if finder.forward_tag(needle).is_some() {
595 finder.skip_white_spaces_and_comments();
596
597 let offset = finder.read_without_context::<i32>()?.try_into().ok()?;
598
599 return Some(offset);
600 }
601
602 pos = pos.checked_sub(1)?;
603 finder.jump(pos);
604 }
605}
606
607#[derive(Debug, PartialEq, Eq, Clone, Copy)]
609enum EntryType {
610 Normal(usize),
612 ObjStream(u32, u32),
616}
617
618type XrefMap = FxHashMap<ObjectIdentifier, EntryType>;
619
620#[derive(Debug)]
622struct MapRepr {
623 xref_map: XrefMap,
624 repaired: bool,
625}
626
627#[derive(Debug, Copy, Clone)]
628pub(crate) struct TrailerData {
629 pub(crate) pages_ref: ObjectIdentifier,
630 pub(crate) root_ref: ObjectIdentifier,
631 pub(crate) version: Option<PdfVersion>,
632}
633
634impl TrailerData {
635 pub(crate) fn dummy() -> Self {
636 Self {
637 pages_ref: ObjectIdentifier::new(0, 0),
638 root_ref: ObjectIdentifier::new(0, 0),
639 version: None,
640 }
641 }
642}
643
644#[derive(Debug, Clone)]
645struct SomeRepr {
646 data: Arc<Data>,
647 map: Arc<RwLock<MapRepr>>,
648 metadata: Arc<Metadata>,
649 decryptor: Arc<Decryptor>,
650 has_ocgs: bool,
651 password: Vec<u8>,
652 trailer_data: TrailerData,
653}
654
655#[derive(Debug, Clone)]
656enum Inner {
657 Dummy,
659 Some(Arc<SomeRepr>),
661}
662
663#[derive(Debug)]
664struct XRefEntry {
665 offset: usize,
666 gen_number: i32,
667 used: bool,
668}
669
670impl XRefEntry {
671 pub(crate) fn read(data: &[u8]) -> Option<Self> {
672 #[inline(always)]
673 fn parse_u32(data: &[u8]) -> Option<u32> {
674 let mut accum = 0_u32;
675
676 for byte in data {
677 accum = accum.checked_mul(10)?;
678
679 match *byte {
680 b'0'..=b'9' => accum = accum.checked_add((*byte - b'0') as u32)?,
681 _ => return None,
682 }
683 }
684
685 Some(accum)
686 }
687
688 let offset = parse_u32(&data[0..10])? as usize;
689 let gen_number = i32::try_from(parse_u32(&data[11..16])?).ok()?;
690
691 let used = data[17] == b'n';
692
693 Some(Self {
694 offset,
695 gen_number,
696 used,
697 })
698 }
699}
700
701fn populate_xref_impl<'a>(data: &'a [u8], pos: usize, xref_map: &mut XrefMap) -> Option<&'a [u8]> {
702 let mut reader = Reader::new(data);
703 reader.jump(pos);
704 reader.skip_white_spaces_and_comments();
706
707 let mut r2 = reader.clone();
708 if reader
709 .clone()
710 .read_without_context::<ObjectIdentifier>()
711 .is_some()
712 {
713 populate_from_xref_stream(data, &mut r2, xref_map)
714 } else {
715 populate_from_xref_table(data, &mut r2, xref_map)
716 }
717}
718
719pub(super) struct SubsectionHeader {
720 pub(super) start: u32,
721 pub(super) num_entries: u32,
722}
723
724impl Readable<'_> for SubsectionHeader {
725 fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
726 r.skip_white_spaces();
727 let start = r.read_without_context::<u32>()?;
728 r.skip_white_spaces();
729 let num_entries = r.read_without_context::<u32>()?;
730 r.skip_white_spaces();
731
732 Some(Self { start, num_entries })
733 }
734}
735
736fn populate_from_xref_table<'a>(
738 data: &'a [u8],
739 reader: &mut Reader<'a>,
740 insert_map: &mut XrefMap,
741) -> Option<&'a [u8]> {
742 let trailer = {
743 let mut reader = reader.clone();
744 read_xref_table_trailer(&mut reader, &ReaderContext::dummy())?
745 };
746
747 reader.skip_white_spaces();
748 reader.forward_tag(b"xref")?;
749 reader.skip_white_spaces();
750
751 let mut max_obj = 0;
752
753 if let Some(prev) = trailer.get::<i32>(PREV) {
754 populate_xref_impl(data, prev as usize, insert_map)?;
756 }
757
758 if let Some(xref_stm) = trailer.get::<i32>(XREF_STM) {
761 populate_xref_impl(data, xref_stm as usize, insert_map)?;
762 }
763
764 while let Some(header) = reader.read_without_context::<SubsectionHeader>() {
765 reader.skip_white_spaces();
766
767 let start = header.start;
768 let end = start + header.num_entries;
769
770 for obj_number in start..end {
771 max_obj = max(max_obj, obj_number);
772 let bytes = reader.read_bytes(XREF_ENTRY_LEN)?;
773 let entry = XRefEntry::read(bytes)?;
774
775 if entry.used {
778 insert_map.insert(
779 ObjectIdentifier::new(obj_number as i32, entry.gen_number),
780 EntryType::Normal(entry.offset),
781 );
782 }
783 }
784 }
785
786 Some(trailer.data())
787}
788
789fn populate_from_xref_stream<'a>(
790 data: &'a [u8],
791 reader: &mut Reader<'a>,
792 insert_map: &mut XrefMap,
793) -> Option<&'a [u8]> {
794 let stream = reader
795 .read_with_context::<IndirectObject<Stream<'_>>>(&ReaderContext::dummy())?
796 .get();
797
798 if let Some(prev) = stream.dict().get::<i32>(PREV) {
799 let _ = populate_xref_impl(data, prev as usize, insert_map)?;
801 }
802
803 let size = stream.dict().get::<u32>(SIZE)?;
804
805 let [f1_len, f2_len, f3_len] = stream.dict().get::<[u8; 3]>(W)?;
806
807 if f2_len > size_of::<u64>() as u8 {
808 error!("xref offset length is larger than the allowed limit");
809
810 return None;
811 }
812
813 if f1_len != 1 {
815 warn!("first field in xref stream was longer than 1");
816 }
817
818 let xref_data = stream.decoded().ok()?;
819 let mut xref_reader = Reader::new(xref_data.as_ref());
820
821 if let Some(arr) = stream.dict().get::<Array<'_>>(INDEX) {
822 let iter = arr.iter::<(u32, u32)>();
823
824 for (start, num_elements) in iter {
825 xref_stream_subsection(
826 &mut xref_reader,
827 start,
828 num_elements,
829 f1_len,
830 f2_len,
831 f3_len,
832 insert_map,
833 )?;
834 }
835 } else {
836 xref_stream_subsection(
837 &mut xref_reader,
838 0,
839 size,
840 f1_len,
841 f2_len,
842 f3_len,
843 insert_map,
844 )?;
845 }
846
847 Some(stream.dict().data())
848}
849
850fn xref_stream_num(data: &[u8]) -> Option<u32> {
851 Some(match data.len() {
852 0 => return None,
853 1 => u8::from_be(data[0]) as u32,
854 2 => u16::from_be_bytes(data[0..2].try_into().ok()?) as u32,
855 3 => u32::from_be_bytes([0, data[0], data[1], data[2]]),
856 4 => u32::from_be_bytes(data[0..4].try_into().ok()?),
857 8 => {
858 if let Ok(num) = u32::try_from(u64::from_be_bytes(data[0..8].try_into().ok()?)) {
859 return Some(num);
860 } else {
861 warn!("xref stream number is too large");
862
863 return None;
864 }
865 }
866 n => {
867 warn!("invalid xref stream number {n}");
868
869 return None;
870 }
871 })
872}
873
874fn xref_stream_subsection<'a>(
875 xref_reader: &mut Reader<'a>,
876 start: u32,
877 num_elements: u32,
878 f1_len: u8,
879 f2_len: u8,
880 f3_len: u8,
881 insert_map: &mut XrefMap,
882) -> Option<()> {
883 for i in 0..num_elements {
884 let f_type = if f1_len == 0 {
885 1
886 } else {
887 xref_reader.read_bytes(1)?[0]
889 };
890
891 let obj_number = start + i;
892
893 match f_type {
894 0 => {
896 xref_reader.skip_bytes(f2_len as usize + f3_len as usize)?;
897 }
898 1 => {
899 let offset = if f2_len > 0 {
900 let data = xref_reader.read_bytes(f2_len as usize)?;
901 xref_stream_num(data)?
902 } else {
903 0
904 };
905
906 let gen_number = if f3_len > 0 {
907 let data = xref_reader.read_bytes(f3_len as usize)?;
908 xref_stream_num(data)?
909 } else {
910 0
911 };
912
913 insert_map.insert(
914 ObjectIdentifier::new(obj_number as i32, gen_number as i32),
915 EntryType::Normal(offset as usize),
916 );
917 }
918 2 => {
919 let obj_stream_number = {
920 let data = xref_reader.read_bytes(f2_len as usize)?;
921 xref_stream_num(data)?
922 };
923 let gen_number = 0;
924 let index = if f3_len > 0 {
925 let data = xref_reader.read_bytes(f3_len as usize)?;
926 xref_stream_num(data)?
927 } else {
928 0
929 };
930
931 insert_map.insert(
932 ObjectIdentifier::new(obj_number as i32, gen_number),
933 EntryType::ObjStream(obj_stream_number, index),
934 );
935 }
936 _ => {
937 warn!("xref has unknown field type {f_type}");
938
939 return None;
940 }
941 }
942 }
943
944 Some(())
945}
946
947fn read_xref_table_trailer<'a>(
948 reader: &mut Reader<'a>,
949 ctx: &ReaderContext<'a>,
950) -> Option<Dict<'a>> {
951 reader.skip_white_spaces();
952 reader.forward_tag(b"xref")?;
953 reader.skip_white_spaces();
954
955 while let Some(header) = reader.read_without_context::<SubsectionHeader>() {
956 reader.jump(reader.offset() + XREF_ENTRY_LEN * header.num_entries as usize);
957 }
958
959 reader.skip_white_spaces();
960 reader.forward_tag(b"trailer")?;
961 reader.skip_white_spaces();
962
963 reader.read_with_context::<Dict<'_>>(ctx)
964}
965
966fn get_decryptor(trailer_dict: &Dict<'_>, password: &[u8]) -> Result<Decryptor, XRefError> {
967 if let Some(encryption_dict) = trailer_dict.get::<Dict<'_>>(ENCRYPT) {
968 let id = if let Some(id) = trailer_dict
969 .get::<Array<'_>>(ID)
970 .and_then(|a| a.flex_iter().next::<object::String<'_>>())
971 {
972 id.get().to_vec()
973 } else {
974 vec![]
976 };
977
978 get(&encryption_dict, &id, password).map_err(XRefError::Encryption)
979 } else {
980 Ok(Decryptor::None)
981 }
982}
983
984struct ObjectStream<'a> {
985 data: &'a [u8],
986 ctx: ReaderContext<'a>,
987 offsets: Vec<(u32, usize)>,
988}
989
990impl<'a> ObjectStream<'a> {
991 fn new(inner: Stream<'_>, data: &'a [u8], ctx: &ReaderContext<'a>) -> Option<Self> {
992 let num_objects = inner.dict().get::<usize>(N)?;
993 let first_offset = inner.dict().get::<usize>(FIRST)?;
994
995 let mut r = Reader::new(data);
996
997 let mut offsets = vec![];
998
999 for _ in 0..num_objects {
1000 r.skip_white_spaces_and_comments();
1001 let obj_num = r.read_without_context::<u32>()?;
1003 r.skip_white_spaces_and_comments();
1004 let relative_offset = r.read_without_context::<usize>()?;
1005 offsets.push((obj_num, first_offset + relative_offset));
1006 }
1007
1008 let mut ctx = ctx.clone();
1009 ctx.in_object_stream = true;
1010
1011 Some(Self { data, ctx, offsets })
1012 }
1013
1014 fn get<T>(&self, index: u32) -> Option<T>
1015 where
1016 T: ObjectLike<'a>,
1017 {
1018 let offset = self.offsets.get(index as usize)?.1;
1019 let mut r = Reader::new(self.data);
1020 r.jump(offset);
1021 r.skip_white_spaces_and_comments();
1022
1023 r.read_with_context::<T>(&self.ctx)
1024 }
1025}
1026
1027fn parse_metadata(info_dict: &Dict<'_>) -> Metadata {
1028 Metadata {
1029 creation_date: info_dict
1030 .get::<object::String<'_>>(CREATION_DATE)
1031 .and_then(|c| DateTime::from_bytes(c.get().as_ref())),
1032 modification_date: info_dict
1033 .get::<object::String<'_>>(MOD_DATE)
1034 .and_then(|c| DateTime::from_bytes(c.get().as_ref())),
1035 title: info_dict
1036 .get::<object::String<'_>>(TITLE)
1037 .map(|t| t.get().to_vec()),
1038 author: info_dict
1039 .get::<object::String<'_>>(AUTHOR)
1040 .map(|t| t.get().to_vec()),
1041 subject: info_dict
1042 .get::<object::String<'_>>(SUBJECT)
1043 .map(|t| t.get().to_vec()),
1044 keywords: info_dict
1045 .get::<object::String<'_>>(KEYWORDS)
1046 .map(|t| t.get().to_vec()),
1047 creator: info_dict
1048 .get::<object::String<'_>>(CREATOR)
1049 .map(|t| t.get().to_vec()),
1050 producer: info_dict
1051 .get::<object::String<'_>>(PRODUCER)
1052 .map(|t| t.get().to_vec()),
1053 }
1054}