1use anyhow::{Context, Result};
2use lopdf::{Dictionary, Document, Object, ObjectId, Stream, content::Content, content::Operation};
3
4pub struct ImageData {
5 pub data: Vec<u8>,
6 pub width: u32,
7 pub height: u32,
8 pub orientation: u16,
9}
10
11pub fn create_pdf(images: &[ImageData]) -> Result<Document> {
12 let mut doc = Document::with_version("1.5");
13 let pages_id = doc.new_object_id();
14 let catalog_id = doc.add_object(Dictionary::from_iter(vec![
15 ("Type", Object::Name(b"Catalog".to_vec())),
16 ("Pages", Object::Reference(pages_id)),
17 ]));
18 doc.trailer.set("Root", catalog_id);
19
20 let mut page_ids = vec![];
21
22 for img in images {
23 let page_id = create_and_add_page(&mut doc, img, pages_id)?;
24 page_ids.push(Object::Reference(page_id));
25 }
26
27 doc.objects.insert(
28 pages_id,
29 Dictionary::from_iter(vec![
30 ("Type", Object::Name(b"Pages".to_vec())),
31 ("Kids", Object::Array(page_ids)),
32 ("Count", (images.len() as i32).into()),
33 ])
34 .into(),
35 );
36
37 Ok(doc)
38}
39
40pub fn insert_pages(doc: &mut Document, images: &[ImageData], start_index: u32) -> Result<()> {
41 if images.is_empty() {
42 return Ok(());
43 }
44
45 let pages = doc.get_pages();
46 let page_count = pages.len() as u32;
47 let actual_start = start_index.max(1).min(page_count + 1);
48
49 let catalog_id = doc.trailer.get(b"Root").and_then(Object::as_reference).context("The PDF file appears to be structurally corrupted. You may need to repair the PDF or recreate it before editing.")?;
51 let catalog = doc.get_object(catalog_id).and_then(Object::as_dict)?;
52 let pages_root_id = catalog.get(b"Pages").and_then(Object::as_reference).context("The PDF file does not contain any readable pages or its structure is corrupted. You may need to repair the PDF before editing.")?;
53
54 let mut new_page_ids = Vec::new();
55 for img in images {
56 let page_id = create_and_add_page(doc, img, pages_root_id)?;
57 new_page_ids.push(Object::Reference(page_id));
58 }
59
60 let pages_dict = doc
61 .get_object_mut(pages_root_id)
62 .and_then(Object::as_dict_mut)?;
63 let count = pages_dict
64 .get(b"Count")
65 .and_then(Object::as_i64)
66 .unwrap_or(0);
67
68 if let Ok(Object::Array(kids)) = pages_dict.get_mut(b"Kids") {
69 let insert_idx = (actual_start as usize - 1).min(kids.len());
73 for (i, new_page) in new_page_ids.into_iter().enumerate() {
74 kids.insert(insert_idx + i, new_page);
75 }
76 }
77
78 pages_dict.set("Count", count + images.len() as i64);
79
80 Ok(())
81}
82
83fn create_and_add_page(
84 doc: &mut Document,
85 img: &ImageData,
86 parent_pages_id: ObjectId,
87) -> Result<ObjectId> {
88 let xobject = lopdf::xobject::image_from(img.data.clone()).context("Failed to embed the image into the PDF. The image data might be invalid or in an unsupported format.")?;
89 let xobject_id = doc.add_object(xobject);
90
91 let img_name = format!("Im{}", xobject_id.0);
92
93 let content = Content {
94 operations: vec![
95 Operation::new("q", vec![]),
96 Operation::new(
97 "cm",
98 vec![
99 img.width.into(),
100 0.into(),
101 0.into(),
102 img.height.into(),
103 0.into(),
104 0.into(),
105 ],
106 ),
107 Operation::new("Do", vec![Object::Name(img_name.as_bytes().to_vec())]),
108 Operation::new("Q", vec![]),
109 ],
110 };
111
112 let content_id = doc.add_object(Stream::new(Dictionary::new(), content.encode().unwrap()));
113
114 let mut resources = Dictionary::new();
115 let mut xobjects = Dictionary::new();
116 xobjects.set(img_name.as_bytes().to_vec(), Object::Reference(xobject_id));
117 resources.set("XObject", Object::Dictionary(xobjects));
118
119 resources.set(
120 "ProcSet",
121 Object::Array(vec![
122 Object::Name(b"PDF".to_vec()),
123 Object::Name(b"Text".to_vec()),
124 Object::Name(b"ImageB".to_vec()),
125 Object::Name(b"ImageC".to_vec()),
126 Object::Name(b"ImageI".to_vec()),
127 ]),
128 );
129
130 let rotate = match img.orientation {
131 3 => 180,
132 6 => 90,
133 8 => 270,
134 _ => 0,
135 };
136
137 let mut page_dict_vec = vec![
138 ("Type", Object::Name(b"Page".to_vec())),
139 ("Parent", Object::Reference(parent_pages_id)),
140 (
141 "MediaBox",
142 vec![0.into(), 0.into(), img.width.into(), img.height.into()].into(),
143 ),
144 ("Contents", Object::Reference(content_id)),
145 ("Resources", Object::Dictionary(resources)),
146 ];
147
148 if rotate != 0 {
149 page_dict_vec.push(("Rotate", rotate.into()));
150 }
151
152 let page_dict = Dictionary::from_iter(page_dict_vec);
153
154 Ok(doc.add_object(page_dict))
155}
156
157pub fn remove_page(doc: &mut Document, page_number: u32) -> Result<()> {
158 let pages = doc.get_pages();
159 if !pages.contains_key(&page_number) {
160 anyhow::bail!(
161 "Cannot remove page {}. This page does not exist in the document.",
162 page_number
163 );
164 }
165 if pages.len() <= 1 {
166 anyhow::bail!(
167 "Cannot remove page {}. A PDF document must have at least one page.",
168 page_number
169 );
170 }
171 doc.delete_pages(&[page_number]);
172 Ok(())
173}
174
175pub fn swap_pages(doc: &mut Document, page1: u32, page2: u32) -> Result<()> {
176 if page1 == page2 {
177 return Ok(());
178 }
179
180 let pages = doc.get_pages();
181 let pid1 = pages
182 .get(&page1)
183 .copied()
184 .with_context(|| format!("Page index {} is invalid or out of bounds.", page1))?;
185 let pid2 = pages
186 .get(&page2)
187 .copied()
188 .with_context(|| format!("Page index {} is invalid or out of bounds.", page2))?;
189
190 fn replace_kid(
191 doc: &mut Document,
192 parent_id: ObjectId,
193 old_kid: ObjectId,
194 new_kid: ObjectId,
195 ) -> Result<()> {
196 let parent = doc
197 .get_object_mut(parent_id)
198 .and_then(Object::as_dict_mut)?;
199 if let Ok(kids) = parent.get_mut(b"Kids")
200 && let Object::Array(kids_arr) = kids {
201 for kid in kids_arr.iter_mut() {
202 if let Object::Reference(ref_id) = kid
203 && *ref_id == old_kid {
204 *kid = Object::Reference(new_kid);
205 return Ok(());
206 }
207 }
208 }
209 Ok(())
210 }
211
212 let parent1 = match doc.get_object(pid1).and_then(Object::as_dict) {
213 Ok(d) => match d.get(b"Parent") {
214 Ok(Object::Reference(p)) => *p,
215 _ => anyhow::bail!(
216 "The first page specified exists but its structure within the PDF is broken. Please try repairing the PDF."
217 ),
218 },
219 _ => anyhow::bail!(
220 "The first page specified cannot be read because its data is corrupted. Please try repairing the PDF."
221 ),
222 };
223
224 let parent2 = match doc.get_object(pid2).and_then(Object::as_dict) {
225 Ok(d) => match d.get(b"Parent") {
226 Ok(Object::Reference(p)) => *p,
227 _ => anyhow::bail!(
228 "The second page specified exists but its structure within the PDF is broken. Please try repairing the PDF."
229 ),
230 },
231 _ => anyhow::bail!(
232 "The second page specified cannot be read because its data is corrupted. Please try repairing the PDF."
233 ),
234 };
235
236 if parent1 == parent2 {
237 let parent = doc.get_object_mut(parent1).and_then(Object::as_dict_mut)?;
238 if let Ok(kids) = parent.get_mut(b"Kids")
239 && let Object::Array(kids_arr) = kids {
240 let mut idx1 = None;
241 let mut idx2 = None;
242 for (i, kid) in kids_arr.iter().enumerate() {
243 if let Object::Reference(ref_id) = kid {
244 if *ref_id == pid1 {
245 idx1 = Some(i);
246 }
247 if *ref_id == pid2 {
248 idx2 = Some(i);
249 }
250 }
251 }
252 if let (Some(i1), Some(i2)) = (idx1, idx2) {
253 kids_arr.swap(i1, i2);
254 }
255 }
256 } else {
257 replace_kid(doc, parent1, pid1, pid2)?;
258 replace_kid(doc, parent2, pid2, pid1)?;
259
260 if let Ok(Object::Dictionary(d)) = doc.get_object_mut(pid1) {
261 d.set("Parent", Object::Reference(parent2));
262 }
263 if let Ok(Object::Dictionary(d)) = doc.get_object_mut(pid2) {
264 d.set("Parent", Object::Reference(parent1));
265 }
266 }
267
268 Ok(())
269}
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274
275 fn create_dummy_image() -> ImageData {
276 let b64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
277 use base64::{Engine as _, engine::general_purpose::STANDARD};
278 ImageData {
279 data: STANDARD.decode(b64).unwrap(),
280 width: 1,
281 height: 1,
282 orientation: 1,
283 }
284 }
285
286 #[test]
287 fn test_swap_pages() {
288 let mut doc = Document::with_version("1.5");
289 let pages_id = doc.new_object_id();
290 let catalog_id = doc.add_object(Dictionary::from_iter(vec![
291 ("Type", Object::Name(b"Catalog".to_vec())),
292 ("Pages", Object::Reference(pages_id)),
293 ]));
294 doc.trailer.set("Root", catalog_id);
295
296 let mut page_ids = vec![];
297 for _ in 0..3 {
298 let page_dict = Dictionary::from_iter(vec![
299 ("Type", Object::Name(b"Page".to_vec())),
300 ("Parent", Object::Reference(pages_id)),
301 ]);
302 page_ids.push(Object::Reference(doc.add_object(page_dict)));
303 }
304
305 doc.objects.insert(
306 pages_id,
307 Dictionary::from_iter(vec![
308 ("Type", Object::Name(b"Pages".to_vec())),
309 ("Kids", Object::Array(page_ids.clone())),
310 ("Count", 3.into()),
311 ])
312 .into(),
313 );
314
315 assert_eq!(doc.get_pages().len(), 3);
317
318 crate::remove_page(&mut doc, 2).unwrap();
320 assert_eq!(doc.get_pages().len(), 2);
321
322 crate::swap_pages(&mut doc, 1, 2).unwrap();
324 assert_eq!(doc.get_pages().len(), 2);
325 }
326
327 #[test]
328 fn test_cli_sequence() {
329 let dummy_img = create_dummy_image();
330
331 let mut doc = crate::create_pdf(&[dummy_img]).unwrap();
333 let mut out = Vec::new();
334 doc.save_to(&mut out).unwrap();
335
336 let dummy2 = create_dummy_image();
338 let mut doc = Document::load_mem(&out).unwrap();
339 crate::insert_pages(&mut doc, &[dummy2], 1).unwrap();
340 let mut out2 = Vec::new();
341 doc.save_to(&mut out2).unwrap();
342
343 let dummy3 = create_dummy_image();
345 let mut doc = Document::load_mem(&out2).unwrap();
346 crate::insert_pages(&mut doc, &[dummy3], 2).unwrap();
347 let mut out3 = Vec::new();
348 doc.save_to(&mut out3).unwrap();
349
350 let mut doc = Document::load_mem(&out3).unwrap();
352 crate::remove_page(&mut doc, 2).unwrap();
353 let mut out4 = Vec::new();
354 doc.save_to(&mut out4).unwrap();
355
356 let mut doc = Document::load_mem(&out4).unwrap();
358 crate::swap_pages(&mut doc, 1, 2).unwrap();
359 }
360}