Skip to main content

img_to_pdf/
lib.rs

1use anyhow::{Context, Result};
2use lopdf::{Dictionary, Document, Object, ObjectId, Stream, content::Content, content::Operation};
3
4pub struct ImageData {
5    pub data: Vec<u8>,
6    pub width: u32,
7    pub height: u32,
8    pub orientation: u16,
9}
10
11pub fn create_pdf(images: &[ImageData]) -> Result<Document> {
12    let mut doc = Document::with_version("1.5");
13    let pages_id = doc.new_object_id();
14    let catalog_id = doc.add_object(Dictionary::from_iter(vec![
15        ("Type", Object::Name(b"Catalog".to_vec())),
16        ("Pages", Object::Reference(pages_id)),
17    ]));
18    doc.trailer.set("Root", catalog_id);
19
20    let mut page_ids = vec![];
21
22    for img in images {
23        let page_id = create_and_add_page(&mut doc, img, pages_id)?;
24        page_ids.push(Object::Reference(page_id));
25    }
26
27    doc.objects.insert(
28        pages_id,
29        Dictionary::from_iter(vec![
30            ("Type", Object::Name(b"Pages".to_vec())),
31            ("Kids", Object::Array(page_ids)),
32            ("Count", (images.len() as i32).into()),
33        ])
34        .into(),
35    );
36
37    Ok(doc)
38}
39
40pub fn insert_pages(doc: &mut Document, images: &[ImageData], start_index: u32) -> Result<()> {
41    if images.is_empty() {
42        return Ok(());
43    }
44
45    let pages = doc.get_pages();
46    let page_count = pages.len() as u32;
47    let actual_start = start_index.max(1).min(page_count + 1);
48
49    // Find the root Pages object
50    let catalog_id = doc.trailer.get(b"Root").and_then(Object::as_reference).context("The PDF file appears to be structurally corrupted. You may need to repair the PDF or recreate it before editing.")?;
51    let catalog = doc.get_object(catalog_id).and_then(Object::as_dict)?;
52    let pages_root_id = catalog.get(b"Pages").and_then(Object::as_reference).context("The PDF file does not contain any readable pages or its structure is corrupted. You may need to repair the PDF before editing.")?;
53
54    let mut new_page_ids = Vec::new();
55    for img in images {
56        let page_id = create_and_add_page(doc, img, pages_root_id)?;
57        new_page_ids.push(Object::Reference(page_id));
58    }
59
60    let pages_dict = doc
61        .get_object_mut(pages_root_id)
62        .and_then(Object::as_dict_mut)?;
63    let count = pages_dict
64        .get(b"Count")
65        .and_then(Object::as_i64)
66        .unwrap_or(0);
67
68    if let Ok(Object::Array(kids)) = pages_dict.get_mut(b"Kids") {
69        // Find correct insertion index in the kids array.
70        // For simplicity, we just use the actual_start index minus 1 (clamped to kids.len()).
71        // This is correct for simple, flat PDF page trees.
72        let insert_idx = (actual_start as usize - 1).min(kids.len());
73        for (i, new_page) in new_page_ids.into_iter().enumerate() {
74            kids.insert(insert_idx + i, new_page);
75        }
76    }
77
78    pages_dict.set("Count", count + images.len() as i64);
79
80    Ok(())
81}
82
83fn create_and_add_page(
84    doc: &mut Document,
85    img: &ImageData,
86    parent_pages_id: ObjectId,
87) -> Result<ObjectId> {
88    let xobject = lopdf::xobject::image_from(img.data.clone()).context("Failed to embed the image into the PDF. The image data might be invalid or in an unsupported format.")?;
89    let xobject_id = doc.add_object(xobject);
90
91    let img_name = format!("Im{}", xobject_id.0);
92
93    let content = Content {
94        operations: vec![
95            Operation::new("q", vec![]),
96            Operation::new(
97                "cm",
98                vec![
99                    img.width.into(),
100                    0.into(),
101                    0.into(),
102                    img.height.into(),
103                    0.into(),
104                    0.into(),
105                ],
106            ),
107            Operation::new("Do", vec![Object::Name(img_name.as_bytes().to_vec())]),
108            Operation::new("Q", vec![]),
109        ],
110    };
111
112    let content_id = doc.add_object(Stream::new(Dictionary::new(), content.encode().unwrap()));
113
114    let mut resources = Dictionary::new();
115    let mut xobjects = Dictionary::new();
116    xobjects.set(img_name.as_bytes().to_vec(), Object::Reference(xobject_id));
117    resources.set("XObject", Object::Dictionary(xobjects));
118
119    resources.set(
120        "ProcSet",
121        Object::Array(vec![
122            Object::Name(b"PDF".to_vec()),
123            Object::Name(b"Text".to_vec()),
124            Object::Name(b"ImageB".to_vec()),
125            Object::Name(b"ImageC".to_vec()),
126            Object::Name(b"ImageI".to_vec()),
127        ]),
128    );
129
130    let rotate = match img.orientation {
131        3 => 180,
132        6 => 90,
133        8 => 270,
134        _ => 0,
135    };
136
137    let mut page_dict_vec = vec![
138        ("Type", Object::Name(b"Page".to_vec())),
139        ("Parent", Object::Reference(parent_pages_id)),
140        (
141            "MediaBox",
142            vec![0.into(), 0.into(), img.width.into(), img.height.into()].into(),
143        ),
144        ("Contents", Object::Reference(content_id)),
145        ("Resources", Object::Dictionary(resources)),
146    ];
147
148    if rotate != 0 {
149        page_dict_vec.push(("Rotate", rotate.into()));
150    }
151
152    let page_dict = Dictionary::from_iter(page_dict_vec);
153
154    Ok(doc.add_object(page_dict))
155}
156
157pub fn remove_page(doc: &mut Document, page_number: u32) -> Result<()> {
158    let pages = doc.get_pages();
159    if !pages.contains_key(&page_number) {
160        anyhow::bail!(
161            "Cannot remove page {}. This page does not exist in the document.",
162            page_number
163        );
164    }
165    if pages.len() <= 1 {
166        anyhow::bail!(
167            "Cannot remove page {}. A PDF document must have at least one page.",
168            page_number
169        );
170    }
171    doc.delete_pages(&[page_number]);
172    Ok(())
173}
174
175pub fn swap_pages(doc: &mut Document, page1: u32, page2: u32) -> Result<()> {
176    if page1 == page2 {
177        return Ok(());
178    }
179
180    let pages = doc.get_pages();
181    let pid1 = pages
182        .get(&page1)
183        .copied()
184        .with_context(|| format!("Page index {} is invalid or out of bounds.", page1))?;
185    let pid2 = pages
186        .get(&page2)
187        .copied()
188        .with_context(|| format!("Page index {} is invalid or out of bounds.", page2))?;
189
190    fn replace_kid(
191        doc: &mut Document,
192        parent_id: ObjectId,
193        old_kid: ObjectId,
194        new_kid: ObjectId,
195    ) -> Result<()> {
196        let parent = doc
197            .get_object_mut(parent_id)
198            .and_then(Object::as_dict_mut)?;
199        if let Ok(kids) = parent.get_mut(b"Kids")
200            && let Object::Array(kids_arr) = kids {
201                for kid in kids_arr.iter_mut() {
202                    if let Object::Reference(ref_id) = kid
203                        && *ref_id == old_kid {
204                            *kid = Object::Reference(new_kid);
205                            return Ok(());
206                        }
207                }
208            }
209        Ok(())
210    }
211
212    let parent1 = match doc.get_object(pid1).and_then(Object::as_dict) {
213        Ok(d) => match d.get(b"Parent") {
214            Ok(Object::Reference(p)) => *p,
215            _ => anyhow::bail!(
216                "The first page specified exists but its structure within the PDF is broken. Please try repairing the PDF."
217            ),
218        },
219        _ => anyhow::bail!(
220            "The first page specified cannot be read because its data is corrupted. Please try repairing the PDF."
221        ),
222    };
223
224    let parent2 = match doc.get_object(pid2).and_then(Object::as_dict) {
225        Ok(d) => match d.get(b"Parent") {
226            Ok(Object::Reference(p)) => *p,
227            _ => anyhow::bail!(
228                "The second page specified exists but its structure within the PDF is broken. Please try repairing the PDF."
229            ),
230        },
231        _ => anyhow::bail!(
232            "The second page specified cannot be read because its data is corrupted. Please try repairing the PDF."
233        ),
234    };
235
236    if parent1 == parent2 {
237        let parent = doc.get_object_mut(parent1).and_then(Object::as_dict_mut)?;
238        if let Ok(kids) = parent.get_mut(b"Kids")
239            && let Object::Array(kids_arr) = kids {
240                let mut idx1 = None;
241                let mut idx2 = None;
242                for (i, kid) in kids_arr.iter().enumerate() {
243                    if let Object::Reference(ref_id) = kid {
244                        if *ref_id == pid1 {
245                            idx1 = Some(i);
246                        }
247                        if *ref_id == pid2 {
248                            idx2 = Some(i);
249                        }
250                    }
251                }
252                if let (Some(i1), Some(i2)) = (idx1, idx2) {
253                    kids_arr.swap(i1, i2);
254                }
255            }
256    } else {
257        replace_kid(doc, parent1, pid1, pid2)?;
258        replace_kid(doc, parent2, pid2, pid1)?;
259
260        if let Ok(Object::Dictionary(d)) = doc.get_object_mut(pid1) {
261            d.set("Parent", Object::Reference(parent2));
262        }
263        if let Ok(Object::Dictionary(d)) = doc.get_object_mut(pid2) {
264            d.set("Parent", Object::Reference(parent1));
265        }
266    }
267
268    Ok(())
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274
275    fn create_dummy_image() -> ImageData {
276        let b64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
277        use base64::{Engine as _, engine::general_purpose::STANDARD};
278        ImageData {
279            data: STANDARD.decode(b64).unwrap(),
280            width: 1,
281            height: 1,
282            orientation: 1,
283        }
284    }
285
286    #[test]
287    fn test_swap_pages() {
288        let mut doc = Document::with_version("1.5");
289        let pages_id = doc.new_object_id();
290        let catalog_id = doc.add_object(Dictionary::from_iter(vec![
291            ("Type", Object::Name(b"Catalog".to_vec())),
292            ("Pages", Object::Reference(pages_id)),
293        ]));
294        doc.trailer.set("Root", catalog_id);
295
296        let mut page_ids = vec![];
297        for _ in 0..3 {
298            let page_dict = Dictionary::from_iter(vec![
299                ("Type", Object::Name(b"Page".to_vec())),
300                ("Parent", Object::Reference(pages_id)),
301            ]);
302            page_ids.push(Object::Reference(doc.add_object(page_dict)));
303        }
304
305        doc.objects.insert(
306            pages_id,
307            Dictionary::from_iter(vec![
308                ("Type", Object::Name(b"Pages".to_vec())),
309                ("Kids", Object::Array(page_ids.clone())),
310                ("Count", 3.into()),
311            ])
312            .into(),
313        );
314
315        // Ensure get_pages works
316        assert_eq!(doc.get_pages().len(), 3);
317
318        // Remove page 2
319        crate::remove_page(&mut doc, 2).unwrap();
320        assert_eq!(doc.get_pages().len(), 2);
321
322        // Swap page 1 and 2
323        crate::swap_pages(&mut doc, 1, 2).unwrap();
324        assert_eq!(doc.get_pages().len(), 2);
325    }
326
327    #[test]
328    fn test_cli_sequence() {
329        let dummy_img = create_dummy_image();
330
331        // 1. Create
332        let mut doc = crate::create_pdf(&[dummy_img]).unwrap();
333        let mut out = Vec::new();
334        doc.save_to(&mut out).unwrap();
335
336        // 2. Insert at index 1
337        let dummy2 = create_dummy_image();
338        let mut doc = Document::load_mem(&out).unwrap();
339        crate::insert_pages(&mut doc, &[dummy2], 1).unwrap();
340        let mut out2 = Vec::new();
341        doc.save_to(&mut out2).unwrap();
342
343        // 3. Insert at index 2
344        let dummy3 = create_dummy_image();
345        let mut doc = Document::load_mem(&out2).unwrap();
346        crate::insert_pages(&mut doc, &[dummy3], 2).unwrap();
347        let mut out3 = Vec::new();
348        doc.save_to(&mut out3).unwrap();
349
350        // 4. Remove page 2
351        let mut doc = Document::load_mem(&out3).unwrap();
352        crate::remove_page(&mut doc, 2).unwrap();
353        let mut out4 = Vec::new();
354        doc.save_to(&mut out4).unwrap();
355
356        // 5. Swap
357        let mut doc = Document::load_mem(&out4).unwrap();
358        crate::swap_pages(&mut doc, 1, 2).unwrap();
359    }
360}