Skip to main content

pdfium_render/pdf/
document.rs

1//! Defines the [PdfDocument] struct, the entry point to all Pdfium functionality
2//! related to a single PDF file.
3
4pub mod attachment;
5pub mod attachments;
6pub mod bookmark;
7pub mod bookmarks;
8pub mod fonts;
9pub mod form;
10pub mod metadata;
11pub mod page;
12pub mod pages;
13pub mod permissions;
14pub mod signature;
15pub mod signatures;
16
17use crate::bindgen::FPDF_DOCUMENT;
18use crate::error::PdfiumError;
19use crate::error::PdfiumInternalError;
20use crate::pdf::document::attachments::PdfAttachments;
21use crate::pdf::document::bookmarks::PdfBookmarks;
22use crate::pdf::document::fonts::PdfFonts;
23use crate::pdf::document::form::PdfForm;
24use crate::pdf::document::metadata::PdfMetadata;
25use crate::pdf::document::pages::PdfPages;
26use crate::pdf::document::permissions::PdfPermissions;
27use crate::pdf::document::signatures::PdfSignatures;
28use crate::pdfium::PdfiumLibraryBindingsAccessor;
29use crate::utils::files::get_pdfium_file_writer_from_writer;
30use crate::utils::files::FpdfFileAccessExt;
31use std::fmt::{Debug, Formatter};
32use std::io::Cursor;
33use std::io::Write;
34
35#[cfg(not(target_arch = "wasm32"))]
36use std::fs::File;
37
38use std::marker::PhantomData;
39#[cfg(not(target_arch = "wasm32"))]
40use std::path::Path;
41
42#[cfg(target_arch = "wasm32")]
43use js_sys::{Array, Uint8Array};
44
45#[cfg(target_arch = "wasm32")]
46use wasm_bindgen::JsValue;
47
48#[cfg(target_arch = "wasm32")]
49use web_sys::Blob;
50
51// The following dummy declaration is used only when running cargo doc.
52// It allows documentation of WASM-specific functionality to be included
53// in documentation generated on non-WASM targets.
54
55#[cfg(doc)]
56struct Blob;
57
58/// The file version of a [PdfDocument].
59///
60/// A list of PDF file versions is available at <https://en.wikipedia.org/wiki/History_of_PDF>.
61#[derive(Debug, Copy, Clone, PartialEq)]
62pub enum PdfDocumentVersion {
63    /// No version information is available. This is the case if the [PdfDocument]
64    /// was created via a call to `Pdfium::create_new_pdf()` rather than loaded from a file.
65    Unset,
66
67    /// PDF 1.0, first published in 1993, supported by Acrobat Reader Carousel (1.0) onwards.
68    Pdf1_0,
69
70    /// PDF 1.1, first published in 1994, supported by Acrobat Reader 2.0 onwards.
71    Pdf1_1,
72
73    /// PDF 1.2, first published in 1996, supported by Acrobat Reader 3.0 onwards.
74    Pdf1_2,
75
76    /// PDF 1.3, first published in 2000, supported by Acrobat Reader 4.0 onwards.
77    Pdf1_3,
78
79    /// PDF 1.4, first published in 2001, supported by Acrobat Reader 5.0 onwards.
80    Pdf1_4,
81
82    /// PDF 1.5, first published in 2003, supported by Acrobat Reader 6.0 onwards.
83    Pdf1_5,
84
85    /// PDF 1.6, first published in 2004, supported by Acrobat Reader 7.0 onwards.
86    Pdf1_6,
87
88    /// PDF 1.7, first published in 2006, supported by Acrobat Reader 8.0 onwards,
89    /// adopted as ISO open standard 32000-1 in 2008. Certain proprietary Adobe
90    /// extensions to PDF 1.7 are only fully supported in Acrobat Reader X (10.0)
91    /// and later.
92    Pdf1_7,
93
94    /// PDF 2.0, first published in 2017, ISO open standard 32000-2.
95    Pdf2_0,
96
97    /// A two-digit raw file version number. For instance, a value of 21 would indicate
98    /// PDF version 2.1, a value of 34 would indicate PDF version 3.4, and so on.
99    /// Only used when the file version number is not directly recognized by
100    /// pdfium-render.
101    Other(i32),
102}
103
104impl PdfDocumentVersion {
105    /// The default [PdfDocumentVersion] applied to new documents.
106    pub const DEFAULT_VERSION: PdfDocumentVersion = PdfDocumentVersion::Pdf1_7;
107
108    #[inline]
109    pub(crate) fn from_pdfium(version: i32) -> Self {
110        match version {
111            10 => PdfDocumentVersion::Pdf1_0,
112            11 => PdfDocumentVersion::Pdf1_1,
113            12 => PdfDocumentVersion::Pdf1_2,
114            13 => PdfDocumentVersion::Pdf1_3,
115            14 => PdfDocumentVersion::Pdf1_4,
116            15 => PdfDocumentVersion::Pdf1_5,
117            16 => PdfDocumentVersion::Pdf1_6,
118            17 => PdfDocumentVersion::Pdf1_7,
119            20 => PdfDocumentVersion::Pdf2_0,
120            _ => PdfDocumentVersion::Other(version),
121        }
122    }
123
124    #[inline]
125    pub(crate) fn as_pdfium(&self) -> Option<i32> {
126        match self {
127            PdfDocumentVersion::Pdf1_0 => Some(10),
128            PdfDocumentVersion::Pdf1_1 => Some(11),
129            PdfDocumentVersion::Pdf1_2 => Some(12),
130            PdfDocumentVersion::Pdf1_3 => Some(13),
131            PdfDocumentVersion::Pdf1_4 => Some(14),
132            PdfDocumentVersion::Pdf1_5 => Some(15),
133            PdfDocumentVersion::Pdf1_6 => Some(16),
134            PdfDocumentVersion::Pdf1_7 => Some(17),
135            PdfDocumentVersion::Pdf2_0 => Some(20),
136            PdfDocumentVersion::Other(value) => Some(*value),
137            PdfDocumentVersion::Unset => None,
138        }
139    }
140}
141
142/// An entry point to all the various object collections contained in a single PDF file.
143/// These collections include:
144/// * [PdfDocument::attachments()], an immutable collection of all the [PdfAttachments] in the document.
145/// * [PdfDocument::attachments_mut()], a mutable collection of all the [PdfAttachments] in the document.
146/// * [PdfDocument::bookmarks()], an immutable collection of all the [PdfBookmarks] in the document.
147/// * [PdfDocument::fonts()], an immutable collection of all the [PdfFonts] in the document.
148/// * [PdfDocument::fonts_mut()], a mutable collection of all the [PdfFonts] in the document.
149/// * [PdfDocument::form()], an immutable reference to the [PdfForm] embedded in the document, if any.
150/// * [PdfDocument::metadata()], an immutable collection of all the [PdfMetadata] tags in the document.
151/// * [PdfDocument::pages()], an immutable collection of all the [PdfPages] in the document.
152/// * [PdfDocument::pages_mut()], a mutable collection of all the [PdfPages] in the document.
153/// * [PdfDocument::permissions()], settings relating to security handlers and document permissions
154///   for the document.
155/// * [PdfDocument::signatures()], an immutable collection of all the [PdfSignatures] in the document.
156pub struct PdfDocument<'a> {
157    handle: FPDF_DOCUMENT,
158    output_version: Option<PdfDocumentVersion>,
159    attachments: PdfAttachments<'a>,
160    bookmarks: PdfBookmarks<'a>,
161    form: Option<PdfForm<'a>>,
162    fonts: PdfFonts<'a>,
163    metadata: PdfMetadata<'a>,
164    pages: PdfPages<'a>,
165    permissions: PdfPermissions<'a>,
166    signatures: PdfSignatures<'a>,
167    source_byte_buffer: Option<Vec<u8>>,
168
169    #[cfg_attr(target_arch = "wasm32", allow(dead_code))]
170    // This field is never used when compiling to WASM.
171    file_access_reader: Option<Box<FpdfFileAccessExt<'a>>>,
172
173    lifetime: PhantomData<&'a FPDF_DOCUMENT>,
174}
175
176impl<'a> PdfDocument<'a> {
177    #[inline]
178    pub(crate) fn from_pdfium(handle: FPDF_DOCUMENT) -> Self {
179        let form = PdfForm::from_pdfium(handle);
180
181        let pages = PdfPages::from_pdfium(handle, form.as_ref().map(|form| form.handle()));
182
183        PdfDocument {
184            handle,
185            output_version: None,
186            attachments: PdfAttachments::from_pdfium(handle),
187            bookmarks: PdfBookmarks::from_pdfium(handle),
188            form,
189            fonts: PdfFonts::from_pdfium(handle),
190            metadata: PdfMetadata::from_pdfium(handle),
191            pages,
192            permissions: PdfPermissions::from_pdfium(handle),
193            signatures: PdfSignatures::from_pdfium(handle),
194            source_byte_buffer: None,
195            file_access_reader: None,
196            lifetime: PhantomData,
197        }
198    }
199
200    /// Returns the internal `FPDF_DOCUMENT` handle for this [PdfDocument].
201    #[inline]
202    pub(crate) fn handle(&self) -> FPDF_DOCUMENT {
203        self.handle
204    }
205
206    /// Transfers ownership of the byte buffer containing the binary data of this [PdfDocument],
207    /// so that it will always be available for Pdfium to read data from as needed.
208    #[inline]
209    pub(crate) fn set_source_byte_buffer(&mut self, bytes: Vec<u8>) {
210        self.source_byte_buffer = Some(bytes);
211    }
212
213    /// Binds an `FPDF_FILEACCESS` reader to the lifetime of this [PdfDocument], so that
214    /// it will always be available for Pdfium to read data from as needed.
215    #[cfg_attr(target_arch = "wasm32", allow(dead_code))]
216    // This function is never used when compiling to WASM.
217    #[inline]
218    pub(crate) fn set_file_access_reader(&mut self, reader: Box<FpdfFileAccessExt<'a>>) {
219        self.file_access_reader = Some(reader);
220    }
221
222    /// Returns the file version of this [PdfDocument].
223    pub fn version(&self) -> PdfDocumentVersion {
224        let mut version = 0;
225
226        if unsafe {
227            self.bindings()
228                .FPDF_GetFileVersion(self.handle, &mut version)
229        } != 0
230        {
231            PdfDocumentVersion::from_pdfium(version)
232        } else {
233            PdfDocumentVersion::Unset
234        }
235    }
236
237    /// Sets the file version that will be used the next time this [PdfDocument] is saved.
238    pub fn set_version(&mut self, version: PdfDocumentVersion) {
239        self.output_version = Some(version);
240    }
241
242    /// Returns an immutable collection of all the [PdfAttachments] embedded in this [PdfDocument].
243    #[inline]
244    pub fn attachments(&self) -> &PdfAttachments<'_> {
245        &self.attachments
246    }
247
248    /// Returns a mutable collection of all the [PdfAttachments] embedded in this [PdfDocument].
249    #[inline]
250    pub fn attachments_mut(&mut self) -> &mut PdfAttachments<'a> {
251        &mut self.attachments
252    }
253
254    /// Returns an immutable collection of all the [PdfBookmarks] in this [PdfDocument].
255    #[inline]
256    pub fn bookmarks(&self) -> &PdfBookmarks<'_> {
257        &self.bookmarks
258    }
259
260    /// Returns an immutable reference to the [PdfForm] embedded in this [PdfDocument], if any.
261    #[inline]
262    pub fn form(&self) -> Option<&PdfForm<'_>> {
263        self.form.as_ref()
264    }
265
266    /// Returns an immutable collection of all the [PdfFonts] in this [PdfDocument].
267    #[inline]
268    pub fn fonts(&self) -> &PdfFonts<'_> {
269        &self.fonts
270    }
271
272    /// Returns a mutable collection of all the [PdfFonts] in this [PdfDocument].
273    #[inline]
274    pub fn fonts_mut(&mut self) -> &mut PdfFonts<'a> {
275        &mut self.fonts
276    }
277
278    /// Returns an immutable collection of all the [PdfMetadata] tags in this [PdfDocument].
279    #[inline]
280    pub fn metadata(&self) -> &PdfMetadata<'_> {
281        &self.metadata
282    }
283
284    /// Returns an immutable collection of all the [PdfPages] in this [PdfDocument].
285    #[inline]
286    pub fn pages(&self) -> &PdfPages<'a> {
287        &self.pages
288    }
289
290    /// Returns a mutable collection of all the [PdfPages] in this [PdfDocument].
291    #[inline]
292    pub fn pages_mut(&mut self) -> &mut PdfPages<'a> {
293        &mut self.pages
294    }
295
296    /// Returns an immutable collection of all the [PdfPermissions] applied to this [PdfDocument].
297    #[inline]
298    pub fn permissions(&self) -> &PdfPermissions<'_> {
299        &self.permissions
300    }
301
302    /// Returns an immutable collection of all the [PdfSignatures] attached to this [PdfDocument].
303    #[inline]
304    pub fn signatures(&self) -> &PdfSignatures<'_> {
305        &self.signatures
306    }
307
308    /// Writes this [PdfDocument] to the given writer.
309    pub fn save_to_writer<W: Write + 'static>(&self, writer: &mut W) -> Result<(), PdfiumError> {
310        // TODO: AJRC - 25/5/22 - investigate supporting the FPDF_INCREMENTAL, FPDF_NO_INCREMENTAL,
311        // and FPDF_REMOVE_SECURITY flags defined in fpdf_save.h. There's not a lot of information
312        // on what they actually do, however.
313        // Some small info at https://forum.patagames.com/posts/t155-PDF-SaveFlags.
314
315        let flags = 0;
316
317        let mut pdfium_file_writer = get_pdfium_file_writer_from_writer(writer);
318
319        let result = match self.output_version {
320            Some(version) => unsafe {
321                self.bindings().FPDF_SaveWithVersion(
322                    self.handle,
323                    pdfium_file_writer.as_fpdf_file_write_mut_ptr(),
324                    flags,
325                    version.as_pdfium().unwrap_or_else(|| {
326                        PdfDocumentVersion::DEFAULT_VERSION.as_pdfium().unwrap()
327                    }),
328                )
329            },
330            None => unsafe {
331                self.bindings().FPDF_SaveAsCopy(
332                    self.handle,
333                    pdfium_file_writer.as_fpdf_file_write_mut_ptr(),
334                    flags,
335                )
336            },
337        };
338
339        match self.bindings().is_true(result) {
340            true => {
341                // Pdfium's return value indicated success. Flush the buffer.
342
343                pdfium_file_writer.flush().map_err(PdfiumError::IoError)
344            }
345            false => {
346                // Pdfium's return value indicated failure.
347
348                Err(PdfiumError::PdfiumLibraryInternalError(
349                    PdfiumInternalError::Unknown,
350                ))
351            }
352        }
353    }
354
355    /// Writes this [PdfDocument] to the file at the given path.
356    ///
357    /// This function is not available when compiling to WASM. You have several options for
358    /// saving your PDF document data in WASM:
359    /// * Use either the [PdfDocument::save_to_writer()] or the [PdfDocument::save_to_bytes()] functions,
360    ///   both of which are available when compiling to WASM.
361    /// * Use the [PdfDocument::save_to_blob()] function to save document data directly into a new
362    ///   Javascript `Blob` object. This function is only available when compiling to WASM.
363    #[cfg(not(target_arch = "wasm32"))]
364    pub fn save_to_file(&self, path: &(impl AsRef<Path> + ?Sized)) -> Result<(), PdfiumError> {
365        self.save_to_writer(&mut File::create(path).map_err(PdfiumError::IoError)?)
366    }
367
368    /// Writes this [PdfDocument] to a new byte buffer, returning the byte buffer.
369    pub fn save_to_bytes(&self) -> Result<Vec<u8>, PdfiumError> {
370        let mut cursor = Cursor::new(Vec::new());
371
372        self.save_to_writer(&mut cursor)?;
373
374        Ok(cursor.into_inner())
375    }
376
377    /// Writes this [PdfDocument] to a new `Blob`, returning the `Blob`.
378    ///
379    /// This function is only available when compiling to WASM.
380    #[cfg(any(doc, target_arch = "wasm32"))]
381    pub fn save_to_blob(&self) -> Result<Blob, PdfiumError> {
382        let bytes = self.save_to_bytes()?;
383
384        let array = Uint8Array::new_with_length(bytes.len() as u32);
385
386        array.copy_from(bytes.as_slice());
387
388        let blob =
389            Blob::new_with_u8_array_sequence(&JsValue::from(Array::of1(&JsValue::from(array))))
390                .map_err(|_| PdfiumError::JsSysErrorConstructingBlobFromBytes)?;
391
392        Ok(blob)
393    }
394}
395
396impl<'a> Drop for PdfDocument<'a> {
397    /// Closes this [PdfDocument], releasing held memory and, if the document was loaded
398    /// from a file, the file handle on the document.
399    #[inline]
400    fn drop(&mut self) {
401        // Drop this document's PdfForm, if any, before we close the document itself.
402        // This ensures that FPDFDOC_ExitFormFillEnvironment() is called _before_ FPDF_CloseDocument(),
403        // avoiding a segmentation fault when using Pdfium builds compiled with V8/XFA support.
404
405        self.form = None;
406        unsafe {
407            self.bindings().FPDF_CloseDocument(self.handle);
408        }
409    }
410}
411
412impl<'a> Debug for PdfDocument<'a> {
413    #[inline]
414    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
415        f.debug_struct("PdfDocument")
416            .field("FPDF_DOCUMENT", &format!("{:?}", self.handle))
417            .finish()
418    }
419}
420
421impl<'a> PdfiumLibraryBindingsAccessor<'a> for PdfDocument<'a> {}
422
423#[cfg(feature = "thread_safe")]
424unsafe impl<'a> Send for PdfDocument<'a> {}
425
426#[cfg(feature = "thread_safe")]
427unsafe impl<'a> Sync for PdfDocument<'a> {}