1use lopdf::{Document, Object};
4
5use crate::models::bbox::BoundingBox;
6
7#[derive(Debug, Clone)]
9pub struct PageInfo {
10 pub index: usize,
12 pub page_number: u32,
14 pub media_box: BoundingBox,
16 pub crop_box: BoundingBox,
18 pub rotation: i64,
20 pub width: f64,
22 pub height: f64,
24}
25
26pub fn extract_page_info(doc: &Document) -> Vec<PageInfo> {
28 let pages = doc.get_pages();
29 let mut infos = Vec::with_capacity(pages.len());
30
31 let mut sorted_pages: Vec<_> = pages.into_iter().collect();
32 sorted_pages.sort_by_key(|&(num, _)| num);
33
34 for (idx, (page_num, page_id)) in sorted_pages.into_iter().enumerate() {
35 let page_dict = match doc.get_object(page_id).and_then(|o| o.as_dict().cloned()) {
36 Ok(d) => d,
37 Err(_) => continue,
38 };
39
40 let media_box = extract_rect(doc, &page_dict, b"MediaBox")
41 .unwrap_or_else(|| BoundingBox::new(None, 0.0, 0.0, 612.0, 792.0)); let crop_box =
44 extract_rect(doc, &page_dict, b"CropBox").unwrap_or_else(|| media_box.clone());
45
46 let rotation = page_dict
47 .get(b"Rotate")
48 .ok()
49 .and_then(|o| resolve_integer(doc, o))
50 .unwrap_or(0);
51
52 let width = media_box.width();
53 let height = media_box.height();
54
55 infos.push(PageInfo {
56 index: idx,
57 page_number: page_num,
58 media_box,
59 crop_box,
60 rotation,
61 width,
62 height,
63 });
64 }
65
66 infos
67}
68
69fn extract_rect(doc: &Document, dict: &lopdf::Dictionary, key: &[u8]) -> Option<BoundingBox> {
72 if let Ok(obj) = dict.get(key) {
73 if let Some(bbox) = parse_rect_array(doc, obj) {
74 return Some(bbox);
75 }
76 }
77 if let Ok(parent_ref) = dict.get(b"Parent") {
79 if let Ok((_, parent_obj)) = doc.dereference(parent_ref) {
80 if let Ok(parent_dict) = parent_obj.as_dict() {
81 return extract_rect(doc, parent_dict, key);
82 }
83 }
84 }
85 None
86}
87
88fn parse_rect_array(doc: &Document, obj: &Object) -> Option<BoundingBox> {
89 let arr = match obj {
90 Object::Array(a) => a.clone(),
91 Object::Reference(id) => doc
92 .get_object(*id)
93 .ok()
94 .and_then(|o| o.as_array().ok().cloned())?,
95 _ => return None,
96 };
97
98 if arr.len() < 4 {
99 return None;
100 }
101
102 let vals: Vec<f64> = arr.iter().filter_map(|o| resolve_number(doc, o)).collect();
103
104 if vals.len() < 4 {
105 return None;
106 }
107
108 Some(BoundingBox::new(None, vals[0], vals[1], vals[2], vals[3]))
109}
110
111fn resolve_number(doc: &Document, obj: &Object) -> Option<f64> {
112 match obj {
113 Object::Real(f) => Some(*f),
114 Object::Integer(i) => Some(*i as f64),
115 Object::Reference(id) => doc
116 .get_object(*id)
117 .ok()
118 .and_then(|o| resolve_number(doc, o)),
119 _ => None,
120 }
121}
122
123fn resolve_integer(doc: &Document, obj: &Object) -> Option<i64> {
124 match obj {
125 Object::Integer(i) => Some(*i),
126 Object::Reference(id) => doc
127 .get_object(*id)
128 .ok()
129 .and_then(|o| resolve_integer(doc, o)),
130 _ => None,
131 }
132}
133
134#[cfg(test)]
135mod tests {
136 use super::*;
137
138 #[test]
139 fn test_empty_document() {
140 let doc = Document::new();
141 let infos = extract_page_info(&doc);
142 assert!(infos.is_empty());
143 }
144
145 #[test]
146 fn test_page_info_defaults() {
147 let info = PageInfo {
148 index: 0,
149 page_number: 1,
150 media_box: BoundingBox::new(None, 0.0, 0.0, 612.0, 792.0),
151 crop_box: BoundingBox::new(None, 0.0, 0.0, 612.0, 792.0),
152 rotation: 0,
153 width: 612.0,
154 height: 792.0,
155 };
156 assert_eq!(info.width, 612.0);
157 assert_eq!(info.height, 792.0);
158 assert_eq!(info.rotation, 0);
159 }
160
161 #[test]
162 fn test_rotated_page() {
163 let info = PageInfo {
164 index: 0,
165 page_number: 1,
166 media_box: BoundingBox::new(None, 0.0, 0.0, 595.0, 842.0),
167 crop_box: BoundingBox::new(None, 0.0, 0.0, 595.0, 842.0),
168 rotation: 90,
169 width: 595.0,
170 height: 842.0,
171 };
172 assert_eq!(info.rotation, 90);
173 }
174}