1use crate::content::{TypedIter, UntypedIter};
4use crate::object::Array;
5use crate::object::Dict;
6use crate::object::Name;
7use crate::object::Rect;
8use crate::object::Stream;
9use crate::object::dict::keys::*;
10use crate::object::{Object, ObjectLike};
11use crate::reader::ReaderContext;
12use crate::sync::OnceLock;
13use crate::util::FloatExt;
14use crate::xref::XRef;
15use alloc::boxed::Box;
16use alloc::vec;
17use alloc::vec::Vec;
18use core::ops::Deref;
19use log::warn;
20
21#[derive(Debug, Clone)]
23struct PagesContext {
24 media_box: Option<Rect>,
25 crop_box: Option<Rect>,
26 rotate: Option<u32>,
27}
28
29impl PagesContext {
30 fn new() -> Self {
31 Self {
32 media_box: None,
33 crop_box: None,
34 rotate: None,
35 }
36 }
37}
38
39pub struct Pages<'a> {
41 pages: Vec<Page<'a>>,
42 xref: &'a XRef,
43}
44
45impl<'a> Pages<'a> {
46 pub(crate) fn new(
48 pages_dict: &Dict<'a>,
49 ctx: &ReaderContext<'a>,
50 xref: &'a XRef,
51 ) -> Option<Self> {
52 let mut pages = vec![];
53 let pages_ctx = PagesContext::new();
54 resolve_pages(
55 pages_dict,
56 &mut pages,
57 pages_ctx,
58 Resources::new(Dict::empty(), None, ctx),
59 )?;
60
61 Some(Self { pages, xref })
62 }
63
64 pub(crate) fn new_brute_force(ctx: &ReaderContext<'a>, xref: &'a XRef) -> Option<Self> {
69 let mut pages = vec![];
70
71 for object in xref.objects() {
72 if let Some(dict) = object.into_dict()
73 && let Some(page) = Page::new(
74 &dict,
75 &PagesContext::new(),
76 Resources::new(Dict::empty(), None, ctx),
77 true,
78 )
79 {
80 pages.push(page);
81 }
82 }
83
84 if pages.is_empty() {
85 return None;
86 }
87
88 Some(Self { pages, xref })
89 }
90
91 pub fn xref(&self) -> &'a XRef {
93 self.xref
94 }
95}
96
97impl<'a> Deref for Pages<'a> {
98 type Target = [Page<'a>];
99
100 fn deref(&self) -> &Self::Target {
101 &self.pages
102 }
103}
104
105const MAX_PAGE_TREE_DEPTH: usize = 256;
108
109const MAX_PAGE_COUNT: usize = 100_000;
113
114fn resolve_pages<'a>(
115 pages_dict: &Dict<'a>,
116 entries: &mut Vec<Page<'a>>,
117 ctx: PagesContext,
118 resources: Resources<'a>,
119) -> Option<()> {
120 resolve_pages_depth(pages_dict, entries, ctx, resources, 0)
121}
122
123fn resolve_pages_depth<'a>(
124 pages_dict: &Dict<'a>,
125 entries: &mut Vec<Page<'a>>,
126 mut ctx: PagesContext,
127 resources: Resources<'a>,
128 depth: usize,
129) -> Option<()> {
130 if depth > MAX_PAGE_TREE_DEPTH {
131 log::warn!("Page tree depth exceeds {MAX_PAGE_TREE_DEPTH}, stopping traversal");
132 return None;
133 }
134
135 if let Some(media_box) = pages_dict.get::<Rect>(MEDIA_BOX) {
136 ctx.media_box = Some(media_box);
137 }
138
139 if let Some(crop_box) = pages_dict.get::<Rect>(CROP_BOX) {
140 ctx.crop_box = Some(crop_box);
141 }
142
143 if let Some(rotate) = pages_dict.get::<i32>(ROTATE) {
146 ctx.rotate = Some(rotate.rem_euclid(360) as u32);
147 }
148
149 let resources = Resources::from_parent(
150 pages_dict.get::<Dict<'_>>(RESOURCES).unwrap_or_default(),
151 resources.clone(),
152 );
153
154 let kids = pages_dict.get::<Array<'a>>(KIDS)?;
155
156 for dict in kids.iter::<Dict<'_>>() {
157 if entries.len() >= MAX_PAGE_COUNT {
158 log::warn!("Page count exceeds {MAX_PAGE_COUNT}, stopping page tree traversal");
159 return Some(());
160 }
161
162 match dict.get::<Name>(TYPE).as_deref() {
163 Some(PAGES) => {
164 resolve_pages_depth(&dict, entries, ctx.clone(), resources.clone(), depth + 1);
165 }
166 _ => {
169 if let Some(page) = Page::new(&dict, &ctx, resources.clone(), false) {
170 entries.push(page);
171 }
172 }
173 }
174 }
175
176 Some(())
177}
178
179#[derive(Debug, Copy, Clone)]
181pub enum Rotation {
182 None,
184 Horizontal,
186 Flipped,
188 FlippedHorizontal,
190}
191
192pub struct Page<'a> {
194 inner: Dict<'a>,
195 media_box: Rect,
196 crop_box: Rect,
197 rotation: Rotation,
198 page_streams: OnceLock<Option<Vec<u8>>>,
199 resources: Resources<'a>,
200 ctx: ReaderContext<'a>,
201}
202
203impl<'a> Page<'a> {
204 fn new(
205 dict: &Dict<'a>,
206 ctx: &PagesContext,
207 resources: Resources<'a>,
208 brute_force: bool,
209 ) -> Option<Self> {
210 if brute_force && !dict.contains_key(CONTENTS) {
213 return None;
214 }
215
216 let media_box = dict
217 .get::<Rect>(MEDIA_BOX)
218 .or(ctx.media_box)
219 .unwrap_or(US_LETTER);
220
221 let crop_box = dict
222 .get::<Rect>(CROP_BOX)
223 .or(ctx.crop_box)
224 .unwrap_or(media_box);
225
226 let rotation = match dict
227 .get::<i32>(ROTATE)
228 .map(|r| r.rem_euclid(360) as u32)
229 .or(ctx.rotate)
230 .unwrap_or(0)
231 {
232 0 => Rotation::None,
233 90 => Rotation::Horizontal,
234 180 => Rotation::Flipped,
235 270 => Rotation::FlippedHorizontal,
236 _ => Rotation::None,
237 };
238
239 let ctx = resources.ctx.clone();
240 let resources = Resources::from_parent(
241 dict.get::<Dict<'_>>(RESOURCES).unwrap_or_default(),
242 resources,
243 );
244
245 Some(Self {
246 inner: dict.clone(),
247 media_box,
248 crop_box,
249 rotation,
250 page_streams: OnceLock::new(),
251 resources,
252 ctx,
253 })
254 }
255
256 fn operations_impl(&self) -> Option<UntypedIter<'_>> {
257 let stream = self.page_stream()?;
258 let iter = UntypedIter::new(stream);
259
260 Some(iter)
261 }
262
263 pub fn page_stream(&self) -> Option<&[u8]> {
265 let convert_single = |s: Stream<'_>| {
266 let data = s.decoded().ok()?;
267 Some(data.to_vec())
268 };
269
270 self.page_streams
271 .get_or_init(|| {
272 if let Some(stream) = self.inner.get::<Stream<'_>>(CONTENTS) {
273 convert_single(stream)
274 } else if let Some(array) = self.inner.get::<Array<'_>>(CONTENTS) {
275 let streams = array.iter::<Stream<'_>>().flat_map(convert_single);
276
277 let mut collected = vec![];
278
279 for stream in streams {
280 collected.extend(stream);
281 collected.push(b' ');
283 }
284
285 Some(collected)
286 } else {
287 warn!("contents entry of page was neither stream nor array of streams");
288
289 None
290 }
291 })
292 .as_ref()
293 .map(|d| d.as_slice())
294 }
295
296 pub fn resources(&self) -> &Resources<'a> {
298 &self.resources
299 }
300
301 pub fn media_box(&self) -> Rect {
303 self.media_box
304 }
305
306 pub fn rotation(&self) -> Rotation {
308 self.rotation
309 }
310
311 pub fn crop_box(&self) -> Rect {
313 self.crop_box
314 }
315
316 pub fn intersected_crop_box(&self) -> Rect {
318 self.crop_box().intersect(self.media_box())
319 }
320
321 pub fn base_dimensions(&self) -> (f32, f32) {
335 let crop_box = self.crop_box();
336 let media_box = self.media_box();
337
338 let effective = if crop_box.x0 >= media_box.x0 && crop_box.y0 >= media_box.y0 {
342 crop_box.intersect(media_box)
343 } else {
344 crop_box
345 };
346
347 if (effective.width() as f32).is_nearly_zero()
348 || (effective.height() as f32).is_nearly_zero()
349 {
350 (US_LETTER.width() as f32, US_LETTER.height() as f32)
351 } else {
352 (
353 effective.width().max(1.0) as f32,
354 effective.height().max(1.0) as f32,
355 )
356 }
357 }
358
359 pub fn render_dimensions(&self) -> (f32, f32) {
364 let (mut base_width, mut base_height) = self.base_dimensions();
365
366 if matches!(
367 self.rotation(),
368 Rotation::Horizontal | Rotation::FlippedHorizontal
369 ) {
370 core::mem::swap(&mut base_width, &mut base_height);
371 }
372
373 (base_width, base_height)
374 }
375
376 pub fn operations(&self) -> UntypedIter<'_> {
378 self.operations_impl().unwrap_or(UntypedIter::empty())
379 }
380
381 pub fn raw(&self) -> &Dict<'a> {
383 &self.inner
384 }
385
386 pub fn xref(&self) -> &'a XRef {
388 self.ctx.xref()
389 }
390
391 pub fn typed_operations(&self) -> TypedIter<'_> {
393 TypedIter::from_untyped(self.operations())
394 }
395
396 pub fn annots(&self) -> Vec<Dict<'a>> {
398 self.inner
399 .get::<Array<'_>>(crate::object::dict::keys::ANNOTS)
400 .map(|arr| arr.iter::<Dict<'_>>().collect())
401 .unwrap_or_default()
402 }
403}
404
405#[derive(Clone, Debug)]
407pub struct Resources<'a> {
408 parent: Option<Box<Self>>,
409 ctx: ReaderContext<'a>,
410 pub ext_g_states: Dict<'a>,
412 pub fonts: Dict<'a>,
414 pub properties: Dict<'a>,
416 pub color_spaces: Dict<'a>,
418 pub x_objects: Dict<'a>,
420 pub patterns: Dict<'a>,
422 pub shadings: Dict<'a>,
424}
425
426impl<'a> Resources<'a> {
427 pub fn from_parent(resources: Dict<'a>, parent: Self) -> Self {
429 let ctx = parent.ctx.clone();
430
431 Self::new(resources, Some(parent), &ctx)
432 }
433
434 pub(crate) fn new(resources: Dict<'a>, parent: Option<Self>, ctx: &ReaderContext<'a>) -> Self {
436 let ext_g_states = resources.get::<Dict<'_>>(EXT_G_STATE).unwrap_or_default();
437 let fonts = resources.get::<Dict<'_>>(FONT).unwrap_or_default();
438 let color_spaces = resources.get::<Dict<'_>>(COLORSPACE).unwrap_or_default();
439 let x_objects = resources.get::<Dict<'_>>(XOBJECT).unwrap_or_default();
440 let patterns = resources.get::<Dict<'_>>(PATTERN).unwrap_or_default();
441 let shadings = resources.get::<Dict<'_>>(SHADING).unwrap_or_default();
442 let properties = resources.get::<Dict<'_>>(PROPERTIES).unwrap_or_default();
443
444 let parent = parent.map(Box::new);
445
446 Self {
447 parent,
448 ext_g_states,
449 fonts,
450 color_spaces,
451 properties,
452 x_objects,
453 patterns,
454 shadings,
455 ctx: ctx.clone(),
456 }
457 }
458
459 fn get_resource<T: ObjectLike<'a>>(&self, name: Name, dict: &Dict<'a>) -> Option<T> {
460 dict.get::<T>(name.deref())
461 }
462
463 pub fn parent(&self) -> Option<&Self> {
465 self.parent.as_deref()
466 }
467
468 pub fn get_ext_g_state(&self, name: Name) -> Option<Dict<'a>> {
470 self.get_resource::<Dict<'_>>(name.clone(), &self.ext_g_states)
471 .or_else(|| self.parent.as_ref().and_then(|p| p.get_ext_g_state(name)))
472 }
473
474 pub fn get_color_space(&self, name: Name) -> Option<Object<'a>> {
476 self.get_resource::<Object<'_>>(name.clone(), &self.color_spaces)
477 .or_else(|| self.parent.as_ref().and_then(|p| p.get_color_space(name)))
478 }
479
480 pub fn get_font(&self, name: Name) -> Option<Dict<'a>> {
482 self.get_resource::<Dict<'_>>(name.clone(), &self.fonts)
483 .or_else(|| self.parent.as_ref().and_then(|p| p.get_font(name)))
484 }
485
486 pub fn get_pattern(&self, name: Name) -> Option<Object<'a>> {
488 self.get_resource::<Object<'_>>(name.clone(), &self.patterns)
489 .or_else(|| self.parent.as_ref().and_then(|p| p.get_pattern(name)))
490 }
491
492 pub fn get_x_object(&self, name: Name) -> Option<Stream<'a>> {
494 self.get_resource::<Stream<'_>>(name.clone(), &self.x_objects)
495 .or_else(|| self.parent.as_ref().and_then(|p| p.get_x_object(name)))
496 }
497
498 pub fn get_shading(&self, name: Name) -> Option<Object<'a>> {
500 self.get_resource::<Object<'_>>(name.clone(), &self.shadings)
501 .or_else(|| self.parent.as_ref().and_then(|p| p.get_shading(name)))
502 }
503}
504
505const POINTS_PER_INCH: f64 = 72.0;
507const POINTS_PER_MM: f64 = 1.0 / (10.0 * 2.54) * POINTS_PER_INCH;
508
509pub const A4: Rect = Rect {
511 x0: 0.0,
512 y0: 0.0,
513 x1: 210.0 * POINTS_PER_MM,
514 y1: 297.0 * POINTS_PER_MM,
515};
516
517const US_LETTER: Rect = Rect {
525 x0: 0.0,
526 y0: 0.0,
527 x1: 8.5 * POINTS_PER_INCH,
528 y1: 11.0 * POINTS_PER_INCH,
529};
530
531pub(crate) mod cached {
532 use crate::page::Pages;
533 use crate::reader::ReaderContext;
534 use crate::xref::XRef;
535 use core::ops::Deref;
536
537 #[cfg(feature = "std")]
540 pub(crate) use std::sync::Arc;
541
542 #[cfg(not(feature = "std"))]
543 pub(crate) use alloc::rc::Rc as Arc;
544
545 pub(crate) struct CachedPages {
546 pages: Pages<'static>,
547 _xref: Arc<XRef>,
550 }
551
552 impl CachedPages {
553 pub(crate) fn new(xref: Arc<XRef>) -> Option<Self> {
554 let xref_reference: &'static XRef = unsafe { core::mem::transmute(xref.deref()) };
562
563 let ctx = ReaderContext::new(xref_reference, false);
564 let pages = xref_reference
565 .get_with(xref.trailer_data().pages_ref, &ctx)
566 .and_then(|p| Pages::new(&p, &ctx, xref_reference))
567 .or_else(|| Pages::new_brute_force(&ctx, xref_reference))?;
568
569 Some(Self { pages, _xref: xref })
570 }
571
572 pub(crate) fn get(&self) -> &Pages<'_> {
573 &self.pages
574 }
575 }
576}