rusty_pdf/
lib.rs

1mod error;
2mod image_xobject;
3mod pdf_object;
4mod utils;
5
6use headless_chrome::{Browser, LaunchOptionsBuilder, Tab};
7use image_xobject::ImageXObject;
8use lopdf::{content::Operation, dictionary, Bookmark, Document, Object, ObjectId};
9use pdf_object::PdfObjectDeref;
10use std::{
11    collections::{BTreeMap, HashMap},
12    io::Read,
13    sync::Arc,
14};
15
16pub use error::Error;
17pub use lopdf;
18use utils::Server;
19
20#[derive(Debug, Clone, Default)]
21pub struct Rectangle {
22    pub x1: f64,
23    pub y1: f64,
24    pub x2: f64,
25    pub y2: f64,
26}
27
28impl Rectangle {
29    pub fn scale_image_on_width(width: f64, x: f64, y: f64, dimensions: (f64, f64)) -> Self {
30        let (dx, dy) = dimensions;
31        let ratio = dy / dx;
32        Self {
33            x1: x,
34            y1: y,
35            x2: x + width,
36            y2: y + width * ratio,
37        }
38    }
39}
40
41#[derive(Debug)]
42pub enum Font {
43    Helvetica,
44    Courier,
45    Fontier,
46}
47
48/// The whole PDF document. This struct only loads part of the document on demand.
49#[derive(Debug, Clone)]
50pub struct PDFSigningDocument {
51    raw_document: Document,
52    /// Link between the image name saved and the objectId of the image.
53    /// This is used to reduce the amount of copies of the images in the pdf file.
54    image_signature_object_id: HashMap<String, ObjectId>,
55    //TODO add map of existing font and unsafe name within document
56    // font_unsafe_name: HashMap<String, String>,
57}
58
59fn browser() -> Browser {
60    Browser::new(
61        LaunchOptionsBuilder::default()
62            .headless(true)
63            .build()
64            .unwrap(),
65    )
66    .unwrap()
67}
68
69fn dumb_client(server: &Server) -> (Browser, Arc<Tab>) {
70    let browser = browser();
71    let tab = browser.wait_for_initial_tab().unwrap();
72    tab.navigate_to(&format!("http://127.0.0.1:{}", server.port()))
73        .unwrap();
74    (browser, tab)
75}
76
77fn dumb_server(data: &'static str) -> (Server, Browser, Arc<Tab>) {
78    let server = Server::with_dumb_html(data);
79    let (browser, tab) = dumb_client(&server);
80    (server, browser, tab)
81}
82
83impl PDFSigningDocument {
84    pub fn new(raw_document: Document) -> Self {
85        PDFSigningDocument {
86            raw_document,
87            image_signature_object_id: HashMap::new(),
88        }
89    }
90
91    pub fn generate_pdf_from_html(html_content: &'static str) -> Self {
92        let (_server, _browser, tab) = dumb_server(&html_content);
93
94        let local_pdf = tab
95            .wait_until_navigated()
96            .unwrap()
97            .print_to_pdf(None)
98            .unwrap();
99
100        let new_pdf = Document::load_mem(&local_pdf).unwrap();
101
102        return PDFSigningDocument::new(new_pdf);
103    }
104
105    pub fn merge(documents: Vec<Document>) -> Result<Self, Error> {
106        let mut max_id = 1;
107        let mut pagenum = 1;
108
109        let mut documents_pages = BTreeMap::new();
110        let mut documents_objects = BTreeMap::new();
111        let mut document = Document::with_version("1.5");
112
113        for mut doc in documents {
114            let mut first = false;
115            doc.renumber_objects_with(max_id);
116
117            max_id = doc.max_id + 1;
118
119            documents_pages.extend(
120                doc.get_pages()
121                    .into_iter()
122                    .map(|(_, object_id)| {
123                        if !first {
124                            let bookmark = Bookmark::new(
125                                String::from(format!("Page_{}", pagenum)),
126                                [0.0, 0.0, 1.0],
127                                0,
128                                object_id,
129                            );
130                            document.add_bookmark(bookmark, None);
131                            first = true;
132                            pagenum += 1;
133                        }
134
135                        (object_id, doc.get_object(object_id).unwrap().to_owned())
136                    })
137                    .collect::<BTreeMap<ObjectId, Object>>(),
138            );
139            documents_objects.extend(doc.objects);
140        }
141
142        // Catalog and Pages are mandatory
143        let mut catalog_object: Option<(ObjectId, Object)> = None;
144        let mut pages_object: Option<(ObjectId, Object)> = None;
145
146        // Process all objects except "Page" type
147        for (object_id, object) in documents_objects.iter() {
148            // We have to ignore "Page" (as are processed later), "Outlines" and "Outline" objects
149            // All other objects should be collected and inserted into the main Document
150            match object.type_name().unwrap_or("") {
151                "Catalog" => {
152                    // Collect a first "Catalog" object and use it for the future "Pages"
153                    catalog_object = Some((
154                        if let Some((id, _)) = catalog_object {
155                            id
156                        } else {
157                            *object_id
158                        },
159                        object.clone(),
160                    ));
161                }
162                "Pages" => {
163                    // Collect and update a first "Pages" object and use it for the future "Catalog"
164                    // We have also to merge all dictionaries of the old and the new "Pages" object
165                    if let Ok(dictionary) = object.as_dict() {
166                        let mut dictionary = dictionary.clone();
167                        if let Some((_, ref object)) = pages_object {
168                            if let Ok(old_dictionary) = object.as_dict() {
169                                dictionary.extend(old_dictionary);
170                            }
171                        }
172
173                        pages_object = Some((
174                            if let Some((id, _)) = pages_object {
175                                id
176                            } else {
177                                *object_id
178                            },
179                            Object::Dictionary(dictionary),
180                        ));
181                    }
182                }
183                "Page" => {}     // Ignored, processed later and separately
184                "Outlines" => {} // Ignored, not supported yet
185                "Outline" => {}  // Ignored, not supported yet
186                _ => {
187                    document.objects.insert(*object_id, object.clone());
188                }
189            }
190        }
191
192        // If no "Pages" found abort
193        if pages_object.is_none() {
194            return Err(Error::Other("Pages root not found.".to_owned()));
195        }
196
197        // Iter over all "Page" and collect with the parent "Pages" created before
198        for (object_id, object) in documents_pages.iter() {
199            if let Ok(dictionary) = object.as_dict() {
200                let mut dictionary = dictionary.clone();
201                dictionary.set("Parent", pages_object.as_ref().unwrap().0);
202
203                document
204                    .objects
205                    .insert(*object_id, Object::Dictionary(dictionary));
206            }
207        }
208
209        // If no "Catalog" found abort
210        if catalog_object.is_none() {
211            return Err(Error::Other("Catalog root not found".to_owned()));
212        }
213
214        let catalog_object = catalog_object.unwrap();
215        let pages_object = pages_object.unwrap();
216
217        // Build a new "Pages" with updated fields
218        if let Ok(dictionary) = pages_object.1.as_dict() {
219            let mut dictionary = dictionary.clone();
220
221            // Set new pages count
222            dictionary.set("Count", documents_pages.len() as u32);
223
224            // Set new "Kids" list (collected from documents pages) for "Pages"
225            dictionary.set(
226                "Kids",
227                documents_pages
228                    .into_iter()
229                    .map(|(object_id, _)| Object::Reference(object_id))
230                    .collect::<Vec<_>>(),
231            );
232
233            document
234                .objects
235                .insert(pages_object.0, Object::Dictionary(dictionary));
236        }
237
238        // Build a new "Catalog" with updated fields
239        if let Ok(dictionary) = catalog_object.1.as_dict() {
240            let mut dictionary = dictionary.clone();
241            dictionary.set("Pages", pages_object.0);
242            dictionary.remove(b"Outlines"); // Outlines not supported in merged PDFs
243
244            document
245                .objects
246                .insert(catalog_object.0, Object::Dictionary(dictionary));
247        }
248
249        document.trailer.set("Root", catalog_object.0);
250
251        // Update the max internal ID as wasn't updated before due to direct objects insertion
252        document.max_id = document.objects.len() as u32;
253
254        // Reorder all new Document objects
255        document.renumber_objects();
256
257        //Set any Bookmarks to the First child if they are not set to a page
258        document.adjust_zero_pages();
259
260        //Set all bookmarks to the PDF Object tree then set the Outlines to the Bookmark content map.
261        if let Some(n) = document.build_outline() {
262            if let Ok(x) = document.get_object_mut(catalog_object.0) {
263                if let Object::Dictionary(ref mut dict) = x {
264                    dict.set("Outlines", Object::Reference(n));
265                }
266            }
267        }
268
269        document.compress();
270
271        Ok(Self {
272            raw_document: document,
273            image_signature_object_id: HashMap::new(),
274        })
275    }
276
277    pub fn finished(self) -> Document {
278        self.raw_document
279    }
280
281    pub fn get_document_ref(&self) -> &Document {
282        &self.raw_document
283    }
284
285    pub fn add_object_from_scaled_vec(&mut self, rect: Rectangle) -> ObjectId {
286        let object_id = self.raw_document.add_object(dictionary! {
287            "Kids" => vec![Object::from(dictionary! {
288                "Type" => "Annot",
289                "Rect" => vec![rect.x1.into(),rect.y1.into(),rect.x2.into(),rect.y2.into()],
290            })]
291        });
292
293        return object_id;
294    }
295
296    pub fn add_signature_to_form<R: Read>(
297        &mut self,
298        image_reader: R,
299        image_name: &str,
300        page_id: ObjectId,
301        form_id: ObjectId,
302    ) -> Result<ObjectId, Error> {
303        let rect = Self::get_rectangle(form_id, &self.raw_document)?;
304
305        let image_object_id_opt = self.image_signature_object_id.get(image_name).cloned();
306
307        Ok(if let Some(image_object_id) = image_object_id_opt {
308            // Image was already added so we can reuse it.
309            self.add_image_to_page_only(image_object_id, image_name, page_id, rect)?
310        } else {
311            // Image was not added already so we need to add it in full
312            let image_object_id = self.add_image(image_reader, image_name, page_id, rect)?;
313            // Add signature to map
314            self.image_signature_object_id
315                .insert(image_name.to_owned(), image_object_id);
316            image_object_id
317        })
318    }
319
320    // add font if not used before and insert text in desired location
321    pub fn add_text_to_doc(
322        &mut self,
323        text: &str,
324        dimensions: (f64, f64),
325        _font: Font,
326        font_size: f64,
327        page_id: ObjectId,
328    ) -> Result<(), Error> {
329        let mut page_content = self.raw_document.get_and_decode_page_content(page_id)?;
330
331        let (x, y) = dimensions;
332
333        let operations = vec![
334            Operation::new("BT", vec![]),
335            Operation::new("Tf", vec!["F1".into(), font_size.into()]),
336            Operation::new("Td", vec![x.into(), y.into()]),
337            Operation::new("Tj", vec![Object::string_literal(text)]),
338            Operation::new("ET", vec![]),
339        ];
340
341        for i in operations {
342            page_content.operations.push(i);
343        }
344
345        self.raw_document
346            .change_page_content(page_id, page_content.encode()?)?;
347        Ok(())
348    }
349
350    /// For an AcroForm find the rectangle on the page.
351    fn get_rectangle(form_id: ObjectId, raw_doc: &Document) -> Result<Rectangle, Error> {
352        let mut rect = None;
353        // Get kids
354        let form_dict = raw_doc.get_object(form_id)?.as_dict()?;
355        let kids = if form_dict.has(b"Kids") {
356            Some(form_dict.get(b"Kids")?.as_array()?)
357        } else {
358            None
359        };
360
361        if let Some(kids) = kids {
362            for child in kids {
363                let child_dict = child.deref(raw_doc)?.as_dict()?;
364                // Child should be of `Type` `Annot` for Annotation.
365                if child_dict.has(b"Rect") {
366                    let child_rect = child_dict.get(b"Rect")?.as_array()?;
367                    if child_rect.len() >= 4 {
368                        // Found a reference, set as return value
369                        rect = Some(Rectangle {
370                            x1: child_rect[0].as_f64()?,
371                            y1: child_rect[1].as_f64()?,
372                            x2: child_rect[2].as_f64()?,
373                            y2: child_rect[3].as_f64()?,
374                        });
375                    }
376                }
377            }
378        }
379
380        rect.ok_or_else(|| Error::Other("AcroForm: Rectangle not found.".to_owned()))
381    }
382
383    fn add_image<R: Read>(
384        &mut self,
385        image_reader: R,
386        image_name: &str,
387        page_id: ObjectId,
388        rect: Rectangle,
389    ) -> Result<ObjectId, Error> {
390        // Load image
391        let image_decoder = png::Decoder::new(image_reader);
392        let (mut image_xobject, mask_xobject) = ImageXObject::try_from(image_decoder)?;
393        // Add object to object list
394        if let Some(mask_xobject) = mask_xobject {
395            let mask_xobject_id = self.raw_document.add_object(mask_xobject);
396            image_xobject.s_mask = Some(mask_xobject_id);
397        }
398        let image_xobject_id = self.raw_document.add_object(image_xobject);
399        // Add object to xobject list on page (with new IR)
400        // Because of the unique name this item will not be inserted more then once.
401        self.raw_document
402            .add_xobject(page_id, image_name, image_xobject_id)?;
403        // Add xobject to layer (make visible)
404        self.add_image_to_page_stream(image_name, page_id, rect)?;
405
406        Ok(image_xobject_id)
407    }
408
409    fn add_image_to_page_only(
410        &mut self,
411        image_xobject_id: ObjectId,
412        image_name: &str,
413        page_id: ObjectId,
414        rect: Rectangle,
415    ) -> Result<ObjectId, Error> {
416        // Add object to xobject list on page (with new IR)
417        // Because of the unique name this item will not be inserted more then once.
418        self.raw_document
419            .add_xobject(page_id, image_name, image_xobject_id)?;
420        // Add xobject to layer (make visible)
421        self.add_image_to_page_stream(image_name, page_id, rect)?;
422
423        Ok(image_xobject_id)
424    }
425
426    // The image must already be added to the object list!
427    // Please use `add_image` instead.
428    fn add_image_to_page_stream(
429        &mut self,
430        xobject_name: &str,
431        page_id: ObjectId,
432        rect: Rectangle,
433    ) -> Result<(), Error> {
434        let mut content = self.raw_document.get_and_decode_page_content(page_id)?;
435        let position = (rect.x1, rect.y1);
436        let size = (rect.x2 - rect.x1, rect.y2 - rect.y1);
437        // The following lines use commands: see p643 (Table A.1) for more info
438        // `q` = Save graphics state
439        content.operations.push(Operation::new("q", vec![]));
440        // `cm` = Concatenate matrix to current transformation matrix
441        content.operations.push(Operation::new(
442            "cm",
443            vec![
444                size.0.into(),
445                0i32.into(),
446                0i32.into(),
447                size.1.into(),
448                position.0.into(),
449                position.1.into(),
450            ],
451        ));
452        // `Do` = Invoke named XObject
453        content.operations.push(Operation::new(
454            "Do",
455            vec![Object::Name(xobject_name.as_bytes().to_vec())],
456        ));
457        // `Q` = Restore graphics state
458        content.operations.push(Operation::new("Q", vec![]));
459
460        self.raw_document
461            .change_page_content(page_id, content.encode()?)?;
462
463        Ok(())
464    }
465}