1use crate::content::{TypedIter, UntypedIter};
4use crate::object::Array;
5use crate::object::Dict;
6use crate::object::Name;
7use crate::object::Rect;
8use crate::object::Stream;
9use crate::object::dict::keys::*;
10use crate::object::{Object, ObjectLike};
11use crate::reader::ReaderContext;
12use crate::sync::OnceLock;
13use crate::util::FloatExt;
14use crate::xref::XRef;
15use alloc::boxed::Box;
16use alloc::vec;
17use alloc::vec::Vec;
18use core::ops::Deref;
19use log::warn;
20
21#[derive(Debug, Clone)]
23struct PagesContext {
24 media_box: Option<Rect>,
25 crop_box: Option<Rect>,
26 rotate: Option<u32>,
27}
28
29impl PagesContext {
30 fn new() -> Self {
31 Self {
32 media_box: None,
33 crop_box: None,
34 rotate: None,
35 }
36 }
37}
38
39pub struct Pages<'a> {
41 pages: Vec<Page<'a>>,
42 xref: &'a XRef,
43}
44
45impl<'a> Pages<'a> {
46 pub(crate) fn new(
48 pages_dict: &Dict<'a>,
49 ctx: &ReaderContext<'a>,
50 xref: &'a XRef,
51 ) -> Option<Self> {
52 let mut pages = vec![];
53 let pages_ctx = PagesContext::new();
54 resolve_pages(
55 pages_dict,
56 &mut pages,
57 pages_ctx,
58 Resources::new(Dict::empty(), None, ctx),
59 )?;
60
61 Some(Self { pages, xref })
62 }
63
64 pub(crate) fn new_brute_force(ctx: &ReaderContext<'a>, xref: &'a XRef) -> Option<Self> {
69 let mut pages = vec![];
70
71 for object in xref.objects() {
72 if let Some(dict) = object.into_dict()
73 && let Some(page) = Page::new(
74 &dict,
75 &PagesContext::new(),
76 Resources::new(Dict::empty(), None, ctx),
77 true,
78 )
79 {
80 pages.push(page);
81 }
82 }
83
84 if pages.is_empty() {
85 return None;
86 }
87
88 Some(Self { pages, xref })
89 }
90
91 pub fn xref(&self) -> &'a XRef {
93 self.xref
94 }
95}
96
97impl<'a> Deref for Pages<'a> {
98 type Target = [Page<'a>];
99
100 fn deref(&self) -> &Self::Target {
101 &self.pages
102 }
103}
104
105const MAX_PAGE_TREE_DEPTH: usize = 256;
108
109const MAX_PAGE_COUNT: usize = 100_000;
113
114fn resolve_pages<'a>(
115 pages_dict: &Dict<'a>,
116 entries: &mut Vec<Page<'a>>,
117 ctx: PagesContext,
118 resources: Resources<'a>,
119) -> Option<()> {
120 let max_depth = resources
121 .ctx
122 .load_limits()
123 .object_depth_limit()
124 .map(|d| d as usize)
125 .unwrap_or(MAX_PAGE_TREE_DEPTH);
126
127 resolve_pages_depth(pages_dict, entries, ctx, resources, 0, max_depth)
128}
129
130fn resolve_pages_depth<'a>(
131 pages_dict: &Dict<'a>,
132 entries: &mut Vec<Page<'a>>,
133 mut ctx: PagesContext,
134 resources: Resources<'a>,
135 depth: usize,
136 max_depth: usize,
137) -> Option<()> {
138 if depth > max_depth {
139 log::warn!("Page tree depth exceeds {max_depth}, stopping traversal");
140 return None;
141 }
142
143 if let Some(media_box) = pages_dict.get::<Rect>(MEDIA_BOX) {
144 ctx.media_box = Some(media_box);
145 }
146
147 if let Some(crop_box) = pages_dict.get::<Rect>(CROP_BOX) {
148 ctx.crop_box = Some(crop_box);
149 }
150
151 if let Some(rotate) = pages_dict.get::<i32>(ROTATE) {
154 ctx.rotate = Some(rotate.rem_euclid(360) as u32);
155 }
156
157 let resources = Resources::from_parent(
158 pages_dict.get::<Dict<'_>>(RESOURCES).unwrap_or_default(),
159 resources.clone(),
160 );
161
162 let kids = pages_dict.get::<Array<'a>>(KIDS)?;
163
164 for dict in kids.iter::<Dict<'_>>() {
165 if entries.len() >= MAX_PAGE_COUNT {
166 log::warn!("Page count exceeds {MAX_PAGE_COUNT}, stopping page tree traversal");
167 return Some(());
168 }
169
170 match dict.get::<Name>(TYPE).as_deref() {
171 Some(PAGES) => {
172 resolve_pages_depth(
173 &dict,
174 entries,
175 ctx.clone(),
176 resources.clone(),
177 depth + 1,
178 max_depth,
179 );
180 }
181 _ => {
184 if let Some(page) = Page::new(&dict, &ctx, resources.clone(), false) {
185 entries.push(page);
186 }
187 }
188 }
189 }
190
191 Some(())
192}
193
194#[derive(Debug, Copy, Clone)]
196pub enum Rotation {
197 None,
199 Horizontal,
201 Flipped,
203 FlippedHorizontal,
205}
206
207pub struct Page<'a> {
209 inner: Dict<'a>,
210 media_box: Rect,
211 crop_box: Rect,
212 rotation: Rotation,
213 page_streams: OnceLock<Option<Vec<u8>>>,
214 resources: Resources<'a>,
215 ctx: ReaderContext<'a>,
216}
217
218impl<'a> Page<'a> {
219 fn new(
220 dict: &Dict<'a>,
221 ctx: &PagesContext,
222 resources: Resources<'a>,
223 brute_force: bool,
224 ) -> Option<Self> {
225 if brute_force && !dict.contains_key(CONTENTS) {
228 return None;
229 }
230
231 let media_box = dict
232 .get::<Rect>(MEDIA_BOX)
233 .or(ctx.media_box)
234 .unwrap_or(US_LETTER);
235
236 let crop_box = dict
237 .get::<Rect>(CROP_BOX)
238 .or(ctx.crop_box)
239 .unwrap_or(media_box);
240
241 let rotation = match dict
242 .get::<i32>(ROTATE)
243 .map(|r| r.rem_euclid(360) as u32)
244 .or(ctx.rotate)
245 .unwrap_or(0)
246 {
247 0 => Rotation::None,
248 90 => Rotation::Horizontal,
249 180 => Rotation::Flipped,
250 270 => Rotation::FlippedHorizontal,
251 _ => Rotation::None,
252 };
253
254 let ctx = resources.ctx.clone();
255 let resources = Resources::from_parent(
256 dict.get::<Dict<'_>>(RESOURCES).unwrap_or_default(),
257 resources,
258 );
259
260 Some(Self {
261 inner: dict.clone(),
262 media_box,
263 crop_box,
264 rotation,
265 page_streams: OnceLock::new(),
266 resources,
267 ctx,
268 })
269 }
270
271 fn operations_impl(&self) -> Option<UntypedIter<'_>> {
272 let stream = self.page_stream()?;
273 let iter = UntypedIter::new(stream);
274
275 Some(iter)
276 }
277
278 pub fn page_stream(&self) -> Option<&[u8]> {
280 let convert_single = |s: Stream<'_>| {
281 let data = s.decoded().ok()?;
282 Some(data.to_vec())
283 };
284
285 self.page_streams
286 .get_or_init(|| {
287 if let Some(stream) = self.inner.get::<Stream<'_>>(CONTENTS) {
288 convert_single(stream)
289 } else if let Some(array) = self.inner.get::<Array<'_>>(CONTENTS) {
290 let streams = array.iter::<Stream<'_>>().flat_map(convert_single);
291
292 let mut collected = vec![];
293
294 for stream in streams {
295 collected.extend(stream);
296 collected.push(b' ');
298 }
299
300 Some(collected)
301 } else {
302 warn!("contents entry of page was neither stream nor array of streams");
303
304 None
305 }
306 })
307 .as_ref()
308 .map(|d| d.as_slice())
309 }
310
311 pub fn resources(&self) -> &Resources<'a> {
313 &self.resources
314 }
315
316 pub fn media_box(&self) -> Rect {
318 self.media_box
319 }
320
321 pub fn rotation(&self) -> Rotation {
323 self.rotation
324 }
325
326 pub fn crop_box(&self) -> Rect {
328 self.crop_box
329 }
330
331 pub fn intersected_crop_box(&self) -> Rect {
333 self.crop_box().intersect(self.media_box())
334 }
335
336 pub fn base_dimensions(&self) -> (f32, f32) {
350 let crop_box = self.crop_box();
351 let media_box = self.media_box();
352
353 let effective = if crop_box.x0 >= media_box.x0 && crop_box.y0 >= media_box.y0 {
357 crop_box.intersect(media_box)
358 } else {
359 crop_box
360 };
361
362 if (effective.width() as f32).is_nearly_zero()
363 || (effective.height() as f32).is_nearly_zero()
364 {
365 (US_LETTER.width() as f32, US_LETTER.height() as f32)
366 } else {
367 (
368 effective.width().max(1.0) as f32,
369 effective.height().max(1.0) as f32,
370 )
371 }
372 }
373
374 pub fn render_dimensions(&self) -> (f32, f32) {
379 let (mut base_width, mut base_height) = self.base_dimensions();
380
381 if matches!(
382 self.rotation(),
383 Rotation::Horizontal | Rotation::FlippedHorizontal
384 ) {
385 core::mem::swap(&mut base_width, &mut base_height);
386 }
387
388 (base_width, base_height)
389 }
390
391 pub fn operations(&self) -> UntypedIter<'_> {
393 self.operations_impl().unwrap_or(UntypedIter::empty())
394 }
395
396 pub fn raw(&self) -> &Dict<'a> {
398 &self.inner
399 }
400
401 pub fn xref(&self) -> &'a XRef {
403 self.ctx.xref()
404 }
405
406 pub fn typed_operations(&self) -> TypedIter<'_> {
408 TypedIter::from_untyped(self.operations())
409 }
410
411 pub fn annots(&self) -> Vec<Dict<'a>> {
413 self.inner
414 .get::<Array<'_>>(crate::object::dict::keys::ANNOTS)
415 .map(|arr| arr.iter::<Dict<'_>>().collect())
416 .unwrap_or_default()
417 }
418}
419
420#[derive(Clone, Debug)]
422pub struct Resources<'a> {
423 parent: Option<Box<Self>>,
424 ctx: ReaderContext<'a>,
425 pub ext_g_states: Dict<'a>,
427 pub fonts: Dict<'a>,
429 pub properties: Dict<'a>,
431 pub color_spaces: Dict<'a>,
433 pub x_objects: Dict<'a>,
435 pub patterns: Dict<'a>,
437 pub shadings: Dict<'a>,
439}
440
441impl<'a> Resources<'a> {
442 pub fn from_parent(resources: Dict<'a>, parent: Self) -> Self {
444 let ctx = parent.ctx.clone();
445
446 Self::new(resources, Some(parent), &ctx)
447 }
448
449 pub(crate) fn new(resources: Dict<'a>, parent: Option<Self>, ctx: &ReaderContext<'a>) -> Self {
451 let ext_g_states = resources.get::<Dict<'_>>(EXT_G_STATE).unwrap_or_default();
452 let fonts = resources.get::<Dict<'_>>(FONT).unwrap_or_default();
453 let color_spaces = resources.get::<Dict<'_>>(COLORSPACE).unwrap_or_default();
454 let x_objects = resources.get::<Dict<'_>>(XOBJECT).unwrap_or_default();
455 let patterns = resources.get::<Dict<'_>>(PATTERN).unwrap_or_default();
456 let shadings = resources.get::<Dict<'_>>(SHADING).unwrap_or_default();
457 let properties = resources.get::<Dict<'_>>(PROPERTIES).unwrap_or_default();
458
459 let parent = parent.map(Box::new);
460
461 Self {
462 parent,
463 ext_g_states,
464 fonts,
465 color_spaces,
466 properties,
467 x_objects,
468 patterns,
469 shadings,
470 ctx: ctx.clone(),
471 }
472 }
473
474 fn get_resource<T: ObjectLike<'a>>(&self, name: Name, dict: &Dict<'a>) -> Option<T> {
475 dict.get::<T>(name.deref())
476 }
477
478 pub fn parent(&self) -> Option<&Self> {
480 self.parent.as_deref()
481 }
482
483 pub fn get_ext_g_state(&self, name: Name) -> Option<Dict<'a>> {
485 self.get_resource::<Dict<'_>>(name.clone(), &self.ext_g_states)
486 .or_else(|| self.parent.as_ref().and_then(|p| p.get_ext_g_state(name)))
487 }
488
489 pub fn get_color_space(&self, name: Name) -> Option<Object<'a>> {
491 self.get_resource::<Object<'_>>(name.clone(), &self.color_spaces)
492 .or_else(|| self.parent.as_ref().and_then(|p| p.get_color_space(name)))
493 }
494
495 pub fn get_font(&self, name: Name) -> Option<Dict<'a>> {
497 self.get_resource::<Dict<'_>>(name.clone(), &self.fonts)
498 .or_else(|| self.parent.as_ref().and_then(|p| p.get_font(name)))
499 }
500
501 pub fn get_pattern(&self, name: Name) -> Option<Object<'a>> {
503 self.get_resource::<Object<'_>>(name.clone(), &self.patterns)
504 .or_else(|| self.parent.as_ref().and_then(|p| p.get_pattern(name)))
505 }
506
507 pub fn get_x_object(&self, name: Name) -> Option<Stream<'a>> {
509 self.get_resource::<Stream<'_>>(name.clone(), &self.x_objects)
510 .or_else(|| self.parent.as_ref().and_then(|p| p.get_x_object(name)))
511 }
512
513 pub fn get_shading(&self, name: Name) -> Option<Object<'a>> {
515 self.get_resource::<Object<'_>>(name.clone(), &self.shadings)
516 .or_else(|| self.parent.as_ref().and_then(|p| p.get_shading(name)))
517 }
518}
519
520const POINTS_PER_INCH: f64 = 72.0;
522const POINTS_PER_MM: f64 = 1.0 / (10.0 * 2.54) * POINTS_PER_INCH;
523
524pub const A4: Rect = Rect {
526 x0: 0.0,
527 y0: 0.0,
528 x1: 210.0 * POINTS_PER_MM,
529 y1: 297.0 * POINTS_PER_MM,
530};
531
532const US_LETTER: Rect = Rect {
540 x0: 0.0,
541 y0: 0.0,
542 x1: 8.5 * POINTS_PER_INCH,
543 y1: 11.0 * POINTS_PER_INCH,
544};
545
546pub(crate) mod cached {
547 use crate::page::Pages;
548 use crate::reader::ReaderContext;
549 use crate::xref::XRef;
550 use core::ops::Deref;
551
552 #[cfg(feature = "std")]
555 pub(crate) use std::sync::Arc;
556
557 #[cfg(not(feature = "std"))]
558 pub(crate) use alloc::rc::Rc as Arc;
559
560 pub(crate) struct CachedPages {
561 pages: Pages<'static>,
562 _xref: Arc<XRef>,
565 }
566
567 impl CachedPages {
568 pub(crate) fn new(xref: Arc<XRef>) -> Option<Self> {
569 let xref_reference: &'static XRef = unsafe { core::mem::transmute(xref.deref()) };
577
578 let ctx = ReaderContext::new(xref_reference, false);
579 let pages = xref_reference
580 .get_with(xref.trailer_data().pages_ref, &ctx)
581 .and_then(|p| Pages::new(&p, &ctx, xref_reference))
582 .or_else(|| Pages::new_brute_force(&ctx, xref_reference))?;
583
584 Some(Self { pages, _xref: xref })
585 }
586
587 pub(crate) fn get(&self) -> &Pages<'_> {
588 &self.pages
589 }
590 }
591}