1use crate::content::{TypedIter, UntypedIter};
4use crate::object::Array;
5use crate::object::Dict;
6use crate::object::Name;
7use crate::object::Rect;
8use crate::object::Stream;
9use crate::object::dict::keys::*;
10use crate::object::{Object, ObjectIdentifier, ObjectLike};
11use crate::reader::ReaderContext;
12use crate::sync::OnceLock;
13use crate::util::FloatExt;
14use crate::xref::XRef;
15use alloc::boxed::Box;
16use alloc::collections::BTreeSet;
17use alloc::vec;
18use alloc::vec::Vec;
19use core::ops::Deref;
20use log::warn;
21
22#[derive(Debug, Clone)]
24struct PagesContext {
25 media_box: Option<Rect>,
26 crop_box: Option<Rect>,
27 rotate: Option<u32>,
28}
29
30impl PagesContext {
31 fn new() -> Self {
32 Self {
33 media_box: None,
34 crop_box: None,
35 rotate: None,
36 }
37 }
38}
39
40pub struct Pages<'a> {
42 pages: Vec<Page<'a>>,
43 xref: &'a XRef,
44}
45
46impl<'a> Pages<'a> {
47 pub(crate) fn new(
49 pages_dict: &Dict<'a>,
50 ctx: &ReaderContext<'a>,
51 xref: &'a XRef,
52 ) -> Option<Self> {
53 let mut pages = vec![];
54 let pages_ctx = PagesContext::new();
55 resolve_pages(
56 pages_dict,
57 &mut pages,
58 pages_ctx,
59 Resources::new(Dict::empty(), None, ctx),
60 )?;
61
62 Some(Self { pages, xref })
63 }
64
65 pub(crate) fn new_brute_force(ctx: &ReaderContext<'a>, xref: &'a XRef) -> Option<Self> {
70 let mut pages = vec![];
71
72 for object in xref.objects() {
73 if let Some(dict) = object.into_dict()
74 && let Some(page) = Page::new(
75 &dict,
76 &PagesContext::new(),
77 Resources::new(Dict::empty(), None, ctx),
78 true,
79 )
80 {
81 pages.push(page);
82 }
83 }
84
85 if pages.is_empty() {
86 return None;
87 }
88
89 Some(Self { pages, xref })
90 }
91
92 pub fn xref(&self) -> &'a XRef {
94 self.xref
95 }
96}
97
98impl<'a> Deref for Pages<'a> {
99 type Target = [Page<'a>];
100
101 fn deref(&self) -> &Self::Target {
102 &self.pages
103 }
104}
105
106const MAX_PAGE_TREE_DEPTH: usize = 256;
109
110const MAX_PAGE_COUNT: usize = 100_000;
114
115fn resolve_pages<'a>(
116 pages_dict: &Dict<'a>,
117 entries: &mut Vec<Page<'a>>,
118 ctx: PagesContext,
119 resources: Resources<'a>,
120) -> Option<()> {
121 let max_depth = resources
122 .ctx
123 .load_limits()
124 .object_depth_limit()
125 .map(|d| d as usize)
126 .unwrap_or(MAX_PAGE_TREE_DEPTH);
127
128 let mut visited = BTreeSet::new();
129 resolve_pages_depth(
130 pages_dict,
131 entries,
132 ctx,
133 resources,
134 0,
135 max_depth,
136 &mut visited,
137 )
138}
139
140fn resolve_pages_depth<'a>(
141 pages_dict: &Dict<'a>,
142 entries: &mut Vec<Page<'a>>,
143 mut ctx: PagesContext,
144 resources: Resources<'a>,
145 depth: usize,
146 max_depth: usize,
147 visited: &mut BTreeSet<ObjectIdentifier>,
148) -> Option<()> {
149 if depth > max_depth {
150 log::warn!("Page tree depth exceeds {max_depth}, stopping traversal");
151 return None;
152 }
153
154 if let Some(node_id) = pages_dict.obj_id()
159 && !visited.insert(node_id)
160 {
161 log::warn!("Page tree cycle detected at {node_id:?}, stopping traversal");
162 return Some(());
163 }
164
165 if let Some(media_box) = pages_dict.get::<Rect>(MEDIA_BOX) {
166 ctx.media_box = Some(media_box);
167 }
168
169 if let Some(crop_box) = pages_dict.get::<Rect>(CROP_BOX) {
170 ctx.crop_box = Some(crop_box);
171 }
172
173 if let Some(rotate) = pages_dict.get::<i32>(ROTATE) {
176 ctx.rotate = Some(rotate.rem_euclid(360) as u32);
177 }
178
179 let resources = Resources::from_parent(
180 pages_dict.get::<Dict<'_>>(RESOURCES).unwrap_or_default(),
181 resources.clone(),
182 );
183
184 let kids = pages_dict.get::<Array<'a>>(KIDS)?;
185
186 for dict in kids.iter::<Dict<'_>>() {
187 if entries.len() >= MAX_PAGE_COUNT {
188 log::warn!("Page count exceeds {MAX_PAGE_COUNT}, stopping page tree traversal");
189 return Some(());
190 }
191
192 match dict.get::<Name>(TYPE).as_deref() {
193 Some(PAGES) => {
194 resolve_pages_depth(
195 &dict,
196 entries,
197 ctx.clone(),
198 resources.clone(),
199 depth + 1,
200 max_depth,
201 visited,
202 );
203 }
204 _ => {
207 if let Some(page) = Page::new(&dict, &ctx, resources.clone(), false) {
208 entries.push(page);
209 }
210 }
211 }
212 }
213
214 Some(())
215}
216
217#[derive(Debug, Copy, Clone)]
219pub enum Rotation {
220 None,
222 Horizontal,
224 Flipped,
226 FlippedHorizontal,
228}
229
230pub struct Page<'a> {
232 inner: Dict<'a>,
233 media_box: Rect,
234 crop_box: Rect,
235 rotation: Rotation,
236 page_streams: OnceLock<Option<Vec<u8>>>,
237 resources: Resources<'a>,
238 ctx: ReaderContext<'a>,
239}
240
241impl<'a> Page<'a> {
242 fn new(
243 dict: &Dict<'a>,
244 ctx: &PagesContext,
245 resources: Resources<'a>,
246 brute_force: bool,
247 ) -> Option<Self> {
248 if brute_force && !dict.contains_key(CONTENTS) {
251 return None;
252 }
253
254 let media_box = dict
255 .get::<Rect>(MEDIA_BOX)
256 .or(ctx.media_box)
257 .unwrap_or(US_LETTER);
258
259 let crop_box = dict
260 .get::<Rect>(CROP_BOX)
261 .or(ctx.crop_box)
262 .unwrap_or(media_box);
263
264 let rotation = match dict
265 .get::<i32>(ROTATE)
266 .map(|r| r.rem_euclid(360) as u32)
267 .or(ctx.rotate)
268 .unwrap_or(0)
269 {
270 0 => Rotation::None,
271 90 => Rotation::Horizontal,
272 180 => Rotation::Flipped,
273 270 => Rotation::FlippedHorizontal,
274 _ => Rotation::None,
275 };
276
277 let ctx = resources.ctx.clone();
278 let resources = Resources::from_parent(
279 dict.get::<Dict<'_>>(RESOURCES).unwrap_or_default(),
280 resources,
281 );
282
283 Some(Self {
284 inner: dict.clone(),
285 media_box,
286 crop_box,
287 rotation,
288 page_streams: OnceLock::new(),
289 resources,
290 ctx,
291 })
292 }
293
294 fn operations_impl(&self) -> Option<UntypedIter<'_>> {
295 let stream = self.page_stream()?;
296 let iter = UntypedIter::new(stream);
297
298 Some(iter)
299 }
300
301 pub fn page_stream(&self) -> Option<&[u8]> {
303 let convert_single = |s: Stream<'_>| {
304 let data = s.decoded().ok()?;
305 Some(data.to_vec())
306 };
307
308 self.page_streams
309 .get_or_init(|| {
310 if let Some(stream) = self.inner.get::<Stream<'_>>(CONTENTS) {
311 convert_single(stream)
312 } else if let Some(array) = self.inner.get::<Array<'_>>(CONTENTS) {
313 let streams = array.iter::<Stream<'_>>().flat_map(convert_single);
314
315 let mut collected = vec![];
316
317 for stream in streams {
318 collected.extend(stream);
319 collected.push(b' ');
321 }
322
323 Some(collected)
324 } else {
325 warn!("contents entry of page was neither stream nor array of streams");
326
327 None
328 }
329 })
330 .as_ref()
331 .map(|d| d.as_slice())
332 }
333
334 pub fn resources(&self) -> &Resources<'a> {
336 &self.resources
337 }
338
339 pub fn media_box(&self) -> Rect {
341 self.media_box
342 }
343
344 pub fn rotation(&self) -> Rotation {
346 self.rotation
347 }
348
349 pub fn crop_box(&self) -> Rect {
351 self.crop_box
352 }
353
354 pub fn intersected_crop_box(&self) -> Rect {
356 self.crop_box().intersect(self.media_box())
357 }
358
359 pub fn base_dimensions(&self) -> (f32, f32) {
373 let crop_box = self.crop_box();
374 let media_box = self.media_box();
375
376 let effective = if crop_box.x0 >= media_box.x0 && crop_box.y0 >= media_box.y0 {
380 crop_box.intersect(media_box)
381 } else {
382 crop_box
383 };
384
385 if (effective.width() as f32).is_nearly_zero()
386 || (effective.height() as f32).is_nearly_zero()
387 {
388 (US_LETTER.width() as f32, US_LETTER.height() as f32)
389 } else {
390 (
391 effective.width().max(1.0) as f32,
392 effective.height().max(1.0) as f32,
393 )
394 }
395 }
396
397 pub fn render_dimensions(&self) -> (f32, f32) {
402 let (mut base_width, mut base_height) = self.base_dimensions();
403
404 if matches!(
405 self.rotation(),
406 Rotation::Horizontal | Rotation::FlippedHorizontal
407 ) {
408 core::mem::swap(&mut base_width, &mut base_height);
409 }
410
411 (base_width, base_height)
412 }
413
414 pub fn operations(&self) -> UntypedIter<'_> {
416 self.operations_impl().unwrap_or(UntypedIter::empty())
417 }
418
419 pub fn raw(&self) -> &Dict<'a> {
421 &self.inner
422 }
423
424 pub fn xref(&self) -> &'a XRef {
426 self.ctx.xref()
427 }
428
429 pub fn typed_operations(&self) -> TypedIter<'_> {
431 TypedIter::from_untyped(self.operations())
432 }
433
434 pub fn annots(&self) -> Vec<Dict<'a>> {
436 self.inner
437 .get::<Array<'_>>(crate::object::dict::keys::ANNOTS)
438 .map(|arr| arr.iter::<Dict<'_>>().collect())
439 .unwrap_or_default()
440 }
441}
442
443#[derive(Clone, Debug)]
445pub struct Resources<'a> {
446 parent: Option<Box<Self>>,
447 ctx: ReaderContext<'a>,
448 pub ext_g_states: Dict<'a>,
450 pub fonts: Dict<'a>,
452 pub properties: Dict<'a>,
454 pub color_spaces: Dict<'a>,
456 pub x_objects: Dict<'a>,
458 pub patterns: Dict<'a>,
460 pub shadings: Dict<'a>,
462}
463
464impl<'a> Resources<'a> {
465 pub fn from_parent(resources: Dict<'a>, parent: Self) -> Self {
467 let ctx = parent.ctx.clone();
468
469 Self::new(resources, Some(parent), &ctx)
470 }
471
472 pub(crate) fn new(resources: Dict<'a>, parent: Option<Self>, ctx: &ReaderContext<'a>) -> Self {
474 let ext_g_states = resources.get::<Dict<'_>>(EXT_G_STATE).unwrap_or_default();
475 let fonts = resources.get::<Dict<'_>>(FONT).unwrap_or_default();
476 let color_spaces = resources.get::<Dict<'_>>(COLORSPACE).unwrap_or_default();
477 let x_objects = resources.get::<Dict<'_>>(XOBJECT).unwrap_or_default();
478 let patterns = resources.get::<Dict<'_>>(PATTERN).unwrap_or_default();
479 let shadings = resources.get::<Dict<'_>>(SHADING).unwrap_or_default();
480 let properties = resources.get::<Dict<'_>>(PROPERTIES).unwrap_or_default();
481
482 let parent = parent.map(Box::new);
483
484 Self {
485 parent,
486 ext_g_states,
487 fonts,
488 color_spaces,
489 properties,
490 x_objects,
491 patterns,
492 shadings,
493 ctx: ctx.clone(),
494 }
495 }
496
497 fn get_resource<T: ObjectLike<'a>>(&self, name: Name, dict: &Dict<'a>) -> Option<T> {
498 dict.get::<T>(name.deref())
499 }
500
501 pub fn parent(&self) -> Option<&Self> {
503 self.parent.as_deref()
504 }
505
506 pub fn get_ext_g_state(&self, name: Name) -> Option<Dict<'a>> {
508 self.get_resource::<Dict<'_>>(name.clone(), &self.ext_g_states)
509 .or_else(|| self.parent.as_ref().and_then(|p| p.get_ext_g_state(name)))
510 }
511
512 pub fn get_color_space(&self, name: Name) -> Option<Object<'a>> {
514 self.get_resource::<Object<'_>>(name.clone(), &self.color_spaces)
515 .or_else(|| self.parent.as_ref().and_then(|p| p.get_color_space(name)))
516 }
517
518 pub fn get_font(&self, name: Name) -> Option<Dict<'a>> {
520 self.get_resource::<Dict<'_>>(name.clone(), &self.fonts)
521 .or_else(|| self.parent.as_ref().and_then(|p| p.get_font(name)))
522 }
523
524 pub fn get_pattern(&self, name: Name) -> Option<Object<'a>> {
526 self.get_resource::<Object<'_>>(name.clone(), &self.patterns)
527 .or_else(|| self.parent.as_ref().and_then(|p| p.get_pattern(name)))
528 }
529
530 pub fn get_x_object(&self, name: Name) -> Option<Stream<'a>> {
532 self.get_resource::<Stream<'_>>(name.clone(), &self.x_objects)
533 .or_else(|| self.parent.as_ref().and_then(|p| p.get_x_object(name)))
534 }
535
536 pub fn get_shading(&self, name: Name) -> Option<Object<'a>> {
538 self.get_resource::<Object<'_>>(name.clone(), &self.shadings)
539 .or_else(|| self.parent.as_ref().and_then(|p| p.get_shading(name)))
540 }
541}
542
543const POINTS_PER_INCH: f64 = 72.0;
545const POINTS_PER_MM: f64 = 1.0 / (10.0 * 2.54) * POINTS_PER_INCH;
546
547pub const A4: Rect = Rect {
549 x0: 0.0,
550 y0: 0.0,
551 x1: 210.0 * POINTS_PER_MM,
552 y1: 297.0 * POINTS_PER_MM,
553};
554
555const US_LETTER: Rect = Rect {
563 x0: 0.0,
564 y0: 0.0,
565 x1: 8.5 * POINTS_PER_INCH,
566 y1: 11.0 * POINTS_PER_INCH,
567};
568
569pub(crate) mod cached {
570 use crate::page::Pages;
571 use crate::reader::ReaderContext;
572 use crate::xref::XRef;
573 use core::ops::Deref;
574
575 #[cfg(feature = "std")]
578 pub(crate) use std::sync::Arc;
579
580 #[cfg(not(feature = "std"))]
581 pub(crate) use alloc::rc::Rc as Arc;
582
583 pub(crate) struct CachedPages {
584 pages: Pages<'static>,
585 _xref: Arc<XRef>,
588 page_tree_rebuilt: bool,
591 }
592
593 impl CachedPages {
594 pub(crate) fn new(xref: Arc<XRef>) -> Option<Self> {
595 let xref_reference: &'static XRef = unsafe { core::mem::transmute(xref.deref()) };
603
604 let ctx = ReaderContext::new(xref_reference, false);
605 let normal = xref_reference
609 .get_with(xref.trailer_data().pages_ref, &ctx)
610 .and_then(|p| Pages::new(&p, &ctx, xref_reference));
611 let page_tree_rebuilt = normal.is_none();
612 let pages = normal.or_else(|| Pages::new_brute_force(&ctx, xref_reference))?;
613
614 Some(Self {
615 pages,
616 _xref: xref,
617 page_tree_rebuilt,
618 })
619 }
620
621 pub(crate) fn get(&self) -> &Pages<'_> {
622 &self.pages
623 }
624
625 pub(crate) fn page_tree_rebuilt(&self) -> bool {
627 self.page_tree_rebuilt
628 }
629 }
630}
631
632#[cfg(test)]
633mod cycle_tests {
634 use crate::pdf::Pdf;
635 use alloc::format;
636 use alloc::vec::Vec;
637
638 fn cyclic_pages_pdf() -> Vec<u8> {
643 let mut buf: Vec<u8> = Vec::new();
644 let mut offsets = [0usize; 4];
645 buf.extend_from_slice(b"%PDF-1.7\n");
646 offsets[1] = buf.len();
647 buf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
648 offsets[2] = buf.len();
649 buf.extend_from_slice(
650 b"2 0 obj\n<< /Type /Pages /Kids [3 0 R 2 0 R] /Count 1 >>\nendobj\n",
651 );
652 offsets[3] = buf.len();
653 buf.extend_from_slice(
654 b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\n",
655 );
656 let xref_off = buf.len();
657 buf.extend_from_slice(b"xref\n0 4\n0000000000 65535 f \n");
658 for off in &offsets[1..4] {
659 buf.extend_from_slice(format!("{off:010} 00000 n \n").as_bytes());
660 }
661 buf.extend_from_slice(
662 format!("trailer\n<< /Size 4 /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF").as_bytes(),
663 );
664 buf
665 }
666
667 #[test]
668 fn cyclic_page_tree_yields_one_page_without_runaway() {
669 let pdf = Pdf::new(cyclic_pages_pdf()).expect("cyclic PDF should still load");
670 assert_eq!(
673 pdf.pages().len(),
674 1,
675 "page-tree cycle must not duplicate pages"
676 );
677 }
678}