1use crate::error::{JustPdfError, Result};
2use crate::object::{IndirectRef, PdfDict, PdfObject};
3use crate::parser::PdfDocument;
4
5#[derive(Debug, Clone, Copy, PartialEq)]
7pub struct Rect {
8 pub llx: f64,
9 pub lly: f64,
10 pub urx: f64,
11 pub ury: f64,
12}
13
14impl Rect {
15 pub fn width(&self) -> f64 {
16 (self.urx - self.llx).abs()
17 }
18
19 pub fn height(&self) -> f64 {
20 (self.ury - self.lly).abs()
21 }
22
23 pub fn from_pdf_array(arr: &[PdfObject]) -> Option<Self> {
25 if arr.len() < 4 {
26 return None;
27 }
28 Some(Self {
29 llx: arr[0].as_f64()?,
30 lly: arr[1].as_f64()?,
31 urx: arr[2].as_f64()?,
32 ury: arr[3].as_f64()?,
33 })
34 }
35}
36
37impl std::fmt::Display for Rect {
38 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39 write!(f, "[{} {} {} {}]", self.llx, self.lly, self.urx, self.ury)
40 }
41}
42
43#[derive(Debug, Clone)]
45pub struct PageInfo {
46 pub index: usize,
48 pub page_ref: IndirectRef,
50 pub media_box: Rect,
52 pub crop_box: Option<Rect>,
54 pub bleed_box: Option<Rect>,
56 pub trim_box: Option<Rect>,
58 pub art_box: Option<Rect>,
60 pub rotate: i64,
62 pub contents_ref: Option<PdfObject>,
64 pub resources_ref: Option<PdfObject>,
66}
67
68pub fn collect_pages(doc: &PdfDocument) -> Result<Vec<PageInfo>> {
70 let catalog_ref = doc
71 .catalog_ref()
72 .ok_or(JustPdfError::TrailerNotFound)?
73 .clone();
74
75 let catalog = doc.resolve(&catalog_ref)?;
76 let catalog_dict = catalog.as_dict().ok_or(JustPdfError::InvalidObject {
77 offset: 0,
78 detail: "catalog is not a dict".into(),
79 })?;
80
81 let pages_ref = catalog_dict
82 .get_ref(b"Pages")
83 .ok_or(JustPdfError::InvalidObject {
84 offset: 0,
85 detail: "catalog has no /Pages".into(),
86 })?
87 .clone();
88
89 let mut pages = Vec::new();
90 let inherited = InheritedAttrs::default();
91 walk_page_tree(doc, &pages_ref, &inherited, &mut pages)?;
92 Ok(pages)
93}
94
95pub fn page_count(doc: &PdfDocument) -> Result<usize> {
97 let catalog_ref = doc
98 .catalog_ref()
99 .ok_or(JustPdfError::TrailerNotFound)?
100 .clone();
101
102 let catalog = doc.resolve(&catalog_ref)?;
103 let catalog_dict = catalog.as_dict().ok_or(JustPdfError::InvalidObject {
104 offset: 0,
105 detail: "catalog is not a dict".into(),
106 })?;
107
108 let pages_ref = catalog_dict
109 .get_ref(b"Pages")
110 .ok_or(JustPdfError::InvalidObject {
111 offset: 0,
112 detail: "catalog has no /Pages".into(),
113 })?
114 .clone();
115
116 let pages_obj = doc.resolve(&pages_ref)?;
117 let pages_dict = pages_obj.as_dict().ok_or(JustPdfError::InvalidObject {
118 offset: 0,
119 detail: "Pages is not a dict".into(),
120 })?;
121
122 Ok(pages_dict.get_i64(b"Count").unwrap_or(0) as usize)
123}
124
125pub fn get_page(doc: &PdfDocument, index: usize) -> Result<PageInfo> {
132 let catalog_ref = doc
133 .catalog_ref()
134 .ok_or(JustPdfError::TrailerNotFound)?
135 .clone();
136
137 let catalog = doc.resolve(&catalog_ref)?;
138 let catalog_dict = catalog.as_dict().ok_or(JustPdfError::InvalidObject {
139 offset: 0,
140 detail: "catalog is not a dict".into(),
141 })?;
142
143 let pages_ref = catalog_dict
144 .get_ref(b"Pages")
145 .ok_or(JustPdfError::InvalidObject {
146 offset: 0,
147 detail: "catalog has no /Pages".into(),
148 })?
149 .clone();
150
151 let pages_obj = doc.resolve(&pages_ref)?;
153 let pages_dict = pages_obj.as_dict().ok_or(JustPdfError::InvalidObject {
154 offset: 0,
155 detail: "Pages is not a dict".into(),
156 })?;
157 let count = pages_dict.get_i64(b"Count").unwrap_or(0) as usize;
158 if index >= count {
159 return Err(JustPdfError::InvalidObject {
160 offset: 0,
161 detail: format!(
162 "page index {index} out of range (document has {count} pages)"
163 ),
164 });
165 }
166
167 let inherited = InheritedAttrs::default();
168 let mut counter: usize = 0;
169 walk_page_tree_find(doc, &pages_ref, &inherited, index, &mut counter)
170 .and_then(|opt| {
171 opt.ok_or(JustPdfError::InvalidObject {
172 offset: 0,
173 detail: format!("page index {index} not found in page tree"),
174 })
175 })
176}
177
178fn walk_page_tree_find(
183 doc: &PdfDocument,
184 node_ref: &IndirectRef,
185 inherited: &InheritedAttrs,
186 target: usize,
187 counter: &mut usize,
188) -> Result<Option<PageInfo>> {
189 let node_obj = doc.resolve(node_ref)?;
190 let dict = node_obj.as_dict().ok_or(JustPdfError::InvalidObject {
191 offset: 0,
192 detail: "page tree node is not a dict".into(),
193 })?;
194
195 let node_type = dict.get_name(b"Type").unwrap_or(b"");
196
197 match node_type {
198 b"Pages" => {
199 let subtree_count = dict.get_i64(b"Count").unwrap_or(0) as usize;
202 if *counter + subtree_count <= target {
203 *counter += subtree_count;
204 return Ok(None);
205 }
206
207 let updated = inherited.with_overrides(dict);
208 if let Some(kids) = dict.get_array(b"Kids") {
209 let kid_refs: Vec<IndirectRef> = kids
210 .iter()
211 .filter_map(|obj| obj.as_reference().cloned())
212 .collect();
213
214 for kid_ref in kid_refs {
215 if let Some(page) =
216 walk_page_tree_find(doc, &kid_ref, &updated, target, counter)?
217 {
218 return Ok(Some(page));
219 }
220 }
221 }
222 Ok(None)
223 }
224 _ if node_type == b"Page"
225 || dict.contains_key(b"MediaBox")
226 || inherited.media_box.is_some() =>
227 {
228 let current_index = *counter;
229 *counter += 1;
230
231 if current_index != target {
232 return Ok(None);
233 }
234
235 let updated = inherited.with_overrides(dict);
236
237 let media_box = updated.media_box.unwrap_or(Rect {
238 llx: 0.0,
239 lly: 0.0,
240 urx: 612.0,
241 ury: 792.0,
242 });
243
244 Ok(Some(PageInfo {
245 index: current_index,
246 page_ref: node_ref.clone(),
247 media_box,
248 crop_box: updated
249 .crop_box
250 .or_else(|| dict.get_array(b"CropBox").and_then(Rect::from_pdf_array)),
251 bleed_box: dict.get_array(b"BleedBox").and_then(Rect::from_pdf_array),
252 trim_box: dict.get_array(b"TrimBox").and_then(Rect::from_pdf_array),
253 art_box: dict.get_array(b"ArtBox").and_then(Rect::from_pdf_array),
254 rotate: updated.rotate.unwrap_or(0),
255 contents_ref: dict.get(b"Contents").cloned(),
256 resources_ref: updated
257 .resources
258 .or_else(|| dict.get(b"Resources").cloned()),
259 }))
260 }
261 _ => Ok(None),
262 }
263}
264
265#[derive(Debug, Clone, Default)]
267struct InheritedAttrs {
268 media_box: Option<Rect>,
269 crop_box: Option<Rect>,
270 rotate: Option<i64>,
271 resources: Option<PdfObject>,
272}
273
274impl InheritedAttrs {
275 fn with_overrides(&self, dict: &PdfDict) -> Self {
277 let mut child = self.clone();
278
279 if let Some(arr) = dict.get_array(b"MediaBox")
280 && let Some(rect) = Rect::from_pdf_array(arr)
281 {
282 child.media_box = Some(rect);
283 }
284 if let Some(arr) = dict.get_array(b"CropBox")
285 && let Some(rect) = Rect::from_pdf_array(arr)
286 {
287 child.crop_box = Some(rect);
288 }
289 if let Some(r) = dict.get_i64(b"Rotate") {
290 child.rotate = Some(r);
291 }
292 if dict.get(b"Resources").is_some() {
293 child.resources = dict.get(b"Resources").cloned();
294 }
295
296 child
297 }
298}
299
300fn walk_page_tree(
302 doc: &PdfDocument,
303 node_ref: &IndirectRef,
304 inherited: &InheritedAttrs,
305 pages: &mut Vec<PageInfo>,
306) -> Result<()> {
307 let node_obj = doc.resolve(node_ref)?;
308 let dict = node_obj.as_dict().ok_or(JustPdfError::InvalidObject {
309 offset: 0,
310 detail: "page tree node is not a dict".into(),
311 })?;
312
313 let node_type = dict.get_name(b"Type").unwrap_or(b"");
314
315 match node_type {
316 b"Pages" => {
317 let updated = inherited.with_overrides(dict);
318 if let Some(kids) = dict.get_array(b"Kids") {
319 let kid_refs: Vec<IndirectRef> = kids
320 .iter()
321 .filter_map(|obj| obj.as_reference().cloned())
322 .collect();
323
324 for kid_ref in kid_refs {
325 walk_page_tree(doc, &kid_ref, &updated, pages)?;
326 }
327 }
328 }
329 _ if node_type == b"Page"
330 || dict.contains_key(b"MediaBox")
331 || inherited.media_box.is_some() =>
332 {
333 let updated = inherited.with_overrides(dict);
334
335 let media_box = updated.media_box.unwrap_or(Rect {
336 llx: 0.0,
337 lly: 0.0,
338 urx: 612.0,
339 ury: 792.0,
340 });
341
342 let page_info = PageInfo {
343 index: pages.len(),
344 page_ref: node_ref.clone(),
345 media_box,
346 crop_box: updated
347 .crop_box
348 .or_else(|| dict.get_array(b"CropBox").and_then(Rect::from_pdf_array)),
349 bleed_box: dict.get_array(b"BleedBox").and_then(Rect::from_pdf_array),
350 trim_box: dict.get_array(b"TrimBox").and_then(Rect::from_pdf_array),
351 art_box: dict.get_array(b"ArtBox").and_then(Rect::from_pdf_array),
352 rotate: updated.rotate.unwrap_or(0),
353 contents_ref: dict.get(b"Contents").cloned(),
354 resources_ref: updated
355 .resources
356 .or_else(|| dict.get(b"Resources").cloned()),
357 };
358
359 pages.push(page_info);
360 }
361 _ => {
362 }
364 }
365
366 Ok(())
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372
373 #[test]
374 fn test_rect_from_array() {
375 let arr = vec![
376 PdfObject::Integer(0),
377 PdfObject::Integer(0),
378 PdfObject::Integer(612),
379 PdfObject::Integer(792),
380 ];
381 let rect = Rect::from_pdf_array(&arr).unwrap();
382 assert_eq!(rect.llx, 0.0);
383 assert_eq!(rect.ury, 792.0);
384 assert_eq!(rect.width(), 612.0);
385 assert_eq!(rect.height(), 792.0);
386 }
387
388 #[test]
389 fn test_rect_from_real_array() {
390 let arr = vec![
391 PdfObject::Real(10.5),
392 PdfObject::Real(20.5),
393 PdfObject::Real(595.0),
394 PdfObject::Real(842.0),
395 ];
396 let rect = Rect::from_pdf_array(&arr).unwrap();
397 assert_eq!(rect.llx, 10.5);
398 assert_eq!(rect.width(), 584.5);
399 }
400
401 #[test]
402 fn test_rect_too_short() {
403 let arr = vec![PdfObject::Integer(0), PdfObject::Integer(0)];
404 assert!(Rect::from_pdf_array(&arr).is_none());
405 }
406
407 #[test]
408 fn test_rect_display() {
409 let rect = Rect {
410 llx: 0.0,
411 lly: 0.0,
412 urx: 612.0,
413 ury: 792.0,
414 };
415 assert_eq!(rect.to_string(), "[0 0 612 792]");
416 }
417}