pdfium_render/pdf/
document.rs

1//! Defines the [PdfDocument] struct, the entry point to all Pdfium functionality
2//! related to a single PDF file.
3
4pub mod attachment;
5pub mod attachments;
6pub mod bookmark;
7pub mod bookmarks;
8pub mod fonts;
9pub mod form;
10pub mod metadata;
11pub mod page;
12pub mod pages;
13pub mod permissions;
14pub mod signature;
15pub mod signatures;
16
17use crate::bindgen::FPDF_DOCUMENT;
18use crate::bindings::PdfiumLibraryBindings;
19use crate::error::PdfiumError;
20use crate::error::PdfiumInternalError;
21use crate::pdf::document::attachments::PdfAttachments;
22use crate::pdf::document::bookmarks::PdfBookmarks;
23use crate::pdf::document::fonts::PdfFonts;
24use crate::pdf::document::form::PdfForm;
25use crate::pdf::document::metadata::PdfMetadata;
26use crate::pdf::document::pages::PdfPages;
27use crate::pdf::document::permissions::PdfPermissions;
28use crate::pdf::document::signatures::PdfSignatures;
29use crate::utils::files::get_pdfium_file_writer_from_writer;
30use crate::utils::files::FpdfFileAccessExt;
31use std::fmt::{Debug, Formatter};
32use std::io::Cursor;
33use std::io::Write;
34
35#[cfg(not(target_arch = "wasm32"))]
36use std::fs::File;
37
38#[cfg(not(target_arch = "wasm32"))]
39use std::path::Path;
40
41#[cfg(target_arch = "wasm32")]
42use js_sys::{Array, Uint8Array};
43
44#[cfg(target_arch = "wasm32")]
45use wasm_bindgen::JsValue;
46
47#[cfg(target_arch = "wasm32")]
48use web_sys::Blob;
49
50// The following dummy declaration is used only when running cargo doc.
51// It allows documentation of WASM-specific functionality to be included
52// in documentation generated on non-WASM targets.
53
54#[cfg(doc)]
55struct Blob;
56
57/// The file version of a [PdfDocument].
58///
59/// A list of PDF file versions is available at <https://en.wikipedia.org/wiki/History_of_PDF>.
60#[derive(Debug, Copy, Clone, PartialEq)]
61pub enum PdfDocumentVersion {
62    /// No version information is available. This is the case if the [PdfDocument]
63    /// was created via a call to `Pdfium::create_new_pdf()` rather than loaded from a file.
64    Unset,
65
66    /// PDF 1.0, first published in 1993, supported by Acrobat Reader Carousel (1.0) onwards.
67    Pdf1_0,
68
69    /// PDF 1.1, first published in 1994, supported by Acrobat Reader 2.0 onwards.
70    Pdf1_1,
71
72    /// PDF 1.2, first published in 1996, supported by Acrobat Reader 3.0 onwards.
73    Pdf1_2,
74
75    /// PDF 1.3, first published in 2000, supported by Acrobat Reader 4.0 onwards.
76    Pdf1_3,
77
78    /// PDF 1.4, first published in 2001, supported by Acrobat Reader 5.0 onwards.
79    Pdf1_4,
80
81    /// PDF 1.5, first published in 2003, supported by Acrobat Reader 6.0 onwards.
82    Pdf1_5,
83
84    /// PDF 1.6, first published in 2004, supported by Acrobat Reader 7.0 onwards.
85    Pdf1_6,
86
87    /// PDF 1.7, first published in 2006, supported by Acrobat Reader 8.0 onwards,
88    /// adopted as ISO open standard 32000-1 in 2008. Certain proprietary Adobe
89    /// extensions to PDF 1.7 are only fully supported in Acrobat Reader X (10.0)
90    /// and later.
91    Pdf1_7,
92
93    /// PDF 2.0, first published in 2017, ISO open standard 32000-2.
94    Pdf2_0,
95
96    /// A two-digit raw file version number. For instance, a value of 21 would indicate
97    /// PDF version 2.1, a value of 34 would indicate PDF version 3.4, and so on.
98    /// Only used when the file version number is not directly recognized by
99    /// pdfium-render.
100    Other(i32),
101}
102
103impl PdfDocumentVersion {
104    /// The default [PdfDocumentVersion] applied to new documents.
105    pub const DEFAULT_VERSION: PdfDocumentVersion = PdfDocumentVersion::Pdf1_7;
106
107    #[inline]
108    pub(crate) fn from_pdfium(version: i32) -> Self {
109        match version {
110            10 => PdfDocumentVersion::Pdf1_0,
111            11 => PdfDocumentVersion::Pdf1_1,
112            12 => PdfDocumentVersion::Pdf1_2,
113            13 => PdfDocumentVersion::Pdf1_3,
114            14 => PdfDocumentVersion::Pdf1_4,
115            15 => PdfDocumentVersion::Pdf1_5,
116            16 => PdfDocumentVersion::Pdf1_6,
117            17 => PdfDocumentVersion::Pdf1_7,
118            20 => PdfDocumentVersion::Pdf2_0,
119            _ => PdfDocumentVersion::Other(version),
120        }
121    }
122
123    #[inline]
124    pub(crate) fn as_pdfium(&self) -> Option<i32> {
125        match self {
126            PdfDocumentVersion::Pdf1_0 => Some(10),
127            PdfDocumentVersion::Pdf1_1 => Some(11),
128            PdfDocumentVersion::Pdf1_2 => Some(12),
129            PdfDocumentVersion::Pdf1_3 => Some(13),
130            PdfDocumentVersion::Pdf1_4 => Some(14),
131            PdfDocumentVersion::Pdf1_5 => Some(15),
132            PdfDocumentVersion::Pdf1_6 => Some(16),
133            PdfDocumentVersion::Pdf1_7 => Some(17),
134            PdfDocumentVersion::Pdf2_0 => Some(20),
135            PdfDocumentVersion::Other(value) => Some(*value),
136            PdfDocumentVersion::Unset => None,
137        }
138    }
139}
140
141/// An entry point to all the various object collections contained in a single PDF file.
142/// These collections include:
143/// * [PdfDocument::attachments()], an immutable collection of all the [PdfAttachments] in the document.
144/// * [PdfDocument::attachments_mut()], a mutable collection of all the [PdfAttachments] in the document.
145/// * [PdfDocument::bookmarks()], an immutable collection of all the [PdfBookmarks] in the document.
146/// * [PdfDocument::fonts()], an immutable collection of all the [PdfFonts] in the document.
147/// * [PdfDocument::fonts_mut()], a mutable collection of all the [PdfFonts] in the document.
148/// * [PdfDocument::form()], an immutable reference to the [PdfForm] embedded in the document, if any.
149/// * [PdfDocument::metadata()], an immutable collection of all the [PdfMetadata] tags in the document.
150/// * [PdfDocument::pages()], an immutable collection of all the [PdfPages] in the document.
151/// * [PdfDocument::pages_mut()], a mutable collection of all the [PdfPages] in the document.
152/// * [PdfDocument::permissions()], settings relating to security handlers and document permissions
153///   for the document.
154/// * [PdfDocument::signatures()], an immutable collection of all the [PdfSignatures] in the document.
155pub struct PdfDocument<'a> {
156    handle: FPDF_DOCUMENT,
157    output_version: Option<PdfDocumentVersion>,
158    attachments: PdfAttachments<'a>,
159    bookmarks: PdfBookmarks<'a>,
160    form: Option<PdfForm<'a>>,
161    fonts: PdfFonts<'a>,
162    metadata: PdfMetadata<'a>,
163    pages: PdfPages<'a>,
164    permissions: PdfPermissions<'a>,
165    signatures: PdfSignatures<'a>,
166    bindings: &'a dyn PdfiumLibraryBindings,
167    source_byte_buffer: Option<Vec<u8>>,
168
169    #[cfg_attr(target_arch = "wasm32", allow(dead_code))]
170    // This field is never used when compiling to WASM.
171    file_access_reader: Option<Box<FpdfFileAccessExt<'a>>>,
172}
173
174impl<'a> PdfDocument<'a> {
175    #[inline]
176    pub(crate) fn from_pdfium(
177        handle: FPDF_DOCUMENT,
178        bindings: &'a dyn PdfiumLibraryBindings,
179    ) -> Self {
180        let form = PdfForm::from_pdfium(handle, bindings);
181
182        let pages =
183            PdfPages::from_pdfium(handle, form.as_ref().map(|form| form.handle()), bindings);
184
185        PdfDocument {
186            handle,
187            output_version: None,
188            attachments: PdfAttachments::from_pdfium(handle, bindings),
189            bookmarks: PdfBookmarks::from_pdfium(handle, bindings),
190            form,
191            fonts: PdfFonts::from_pdfium(handle, bindings),
192            metadata: PdfMetadata::from_pdfium(handle, bindings),
193            pages,
194            permissions: PdfPermissions::from_pdfium(handle, bindings),
195            signatures: PdfSignatures::from_pdfium(handle, bindings),
196            bindings,
197            source_byte_buffer: None,
198            file_access_reader: None,
199        }
200    }
201
202    /// Returns the internal `FPDF_DOCUMENT` handle for this [PdfDocument].
203    #[inline]
204    pub(crate) fn handle(&self) -> FPDF_DOCUMENT {
205        self.handle
206    }
207
208    /// Returns the [PdfiumLibraryBindings] used by this [PdfDocument].
209    #[inline]
210    pub fn bindings(&self) -> &'a dyn PdfiumLibraryBindings {
211        self.bindings
212    }
213
214    /// Transfers ownership of the byte buffer containing the binary data of this [PdfDocument],
215    /// so that it will always be available for Pdfium to read data from as needed.
216    #[inline]
217    pub(crate) fn set_source_byte_buffer(&mut self, bytes: Vec<u8>) {
218        self.source_byte_buffer = Some(bytes);
219    }
220
221    /// Binds an `FPDF_FILEACCESS` reader to the lifetime of this [PdfDocument], so that
222    /// it will always be available for Pdfium to read data from as needed.
223    #[cfg_attr(target_arch = "wasm32", allow(dead_code))]
224    // This function is never used when compiling to WASM.
225    #[inline]
226    pub(crate) fn set_file_access_reader(&mut self, reader: Box<FpdfFileAccessExt<'a>>) {
227        self.file_access_reader = Some(reader);
228    }
229
230    /// Returns the file version of this [PdfDocument].
231    pub fn version(&self) -> PdfDocumentVersion {
232        let mut version = 0;
233
234        if self.bindings.FPDF_GetFileVersion(self.handle, &mut version) != 0 {
235            PdfDocumentVersion::from_pdfium(version)
236        } else {
237            PdfDocumentVersion::Unset
238        }
239    }
240
241    /// Sets the file version that will be used the next time this [PdfDocument] is saved.
242    pub fn set_version(&mut self, version: PdfDocumentVersion) {
243        self.output_version = Some(version);
244    }
245
246    /// Returns an immutable collection of all the [PdfAttachments] embedded in this [PdfDocument].
247    #[inline]
248    pub fn attachments(&self) -> &PdfAttachments {
249        &self.attachments
250    }
251
252    /// Returns a mutable collection of all the [PdfAttachments] embedded in this [PdfDocument].
253    #[inline]
254    pub fn attachments_mut(&mut self) -> &mut PdfAttachments<'a> {
255        &mut self.attachments
256    }
257
258    /// Returns an immutable collection of all the [PdfBookmarks] in this [PdfDocument].
259    #[inline]
260    pub fn bookmarks(&self) -> &PdfBookmarks {
261        &self.bookmarks
262    }
263
264    /// Returns an immutable reference to the [PdfForm] embedded in this [PdfDocument], if any.
265    #[inline]
266    pub fn form(&self) -> Option<&PdfForm> {
267        self.form.as_ref()
268    }
269
270    /// Returns an immutable collection of all the [PdfFonts] in this [PdfDocument].
271    #[inline]
272    pub fn fonts(&self) -> &PdfFonts {
273        &self.fonts
274    }
275
276    /// Returns a mutable collection of all the [PdfFonts] in this [PdfDocument].
277    #[inline]
278    pub fn fonts_mut(&mut self) -> &mut PdfFonts<'a> {
279        &mut self.fonts
280    }
281
282    /// Returns an immutable collection of all the [PdfMetadata] tags in this [PdfDocument].
283    #[inline]
284    pub fn metadata(&self) -> &PdfMetadata {
285        &self.metadata
286    }
287
288    /// Returns an immutable collection of all the [PdfPages] in this [PdfDocument].
289    #[inline]
290    pub fn pages(&self) -> &PdfPages<'a> {
291        &self.pages
292    }
293
294    /// Returns a mutable collection of all the [PdfPages] in this [PdfDocument].
295    #[inline]
296    pub fn pages_mut(&mut self) -> &mut PdfPages<'a> {
297        &mut self.pages
298    }
299
300    /// Returns an immutable collection of all the [PdfPermissions] applied to this [PdfDocument].
301    #[inline]
302    pub fn permissions(&self) -> &PdfPermissions {
303        &self.permissions
304    }
305
306    /// Returns an immutable collection of all the [PdfSignatures] attached to this [PdfDocument].
307    #[inline]
308    pub fn signatures(&self) -> &PdfSignatures {
309        &self.signatures
310    }
311
312    /// Writes this [PdfDocument] to the given writer.
313    pub fn save_to_writer<W: Write + 'static>(&self, writer: &mut W) -> Result<(), PdfiumError> {
314        // TODO: AJRC - 25/5/22 - investigate supporting the FPDF_INCREMENTAL, FPDF_NO_INCREMENTAL,
315        // and FPDF_REMOVE_SECURITY flags defined in fpdf_save.h. There's not a lot of information
316        // on what they actually do, however.
317        // Some small info at https://forum.patagames.com/posts/t155-PDF-SaveFlags.
318
319        let flags = 0;
320
321        let mut pdfium_file_writer = get_pdfium_file_writer_from_writer(writer);
322
323        let result = match self.output_version {
324            Some(version) => self.bindings.FPDF_SaveWithVersion(
325                self.handle,
326                pdfium_file_writer.as_fpdf_file_write_mut_ptr(),
327                flags,
328                version
329                    .as_pdfium()
330                    .unwrap_or_else(|| PdfDocumentVersion::DEFAULT_VERSION.as_pdfium().unwrap()),
331            ),
332            None => self.bindings.FPDF_SaveAsCopy(
333                self.handle,
334                pdfium_file_writer.as_fpdf_file_write_mut_ptr(),
335                flags,
336            ),
337        };
338
339        match self.bindings.is_true(result) {
340            true => {
341                // Pdfium's return value indicated success. Flush the buffer.
342
343                pdfium_file_writer.flush().map_err(PdfiumError::IoError)
344            }
345            false => {
346                // Pdfium's return value indicated failure.
347
348                Err(PdfiumError::PdfiumLibraryInternalError(
349                    PdfiumInternalError::Unknown,
350                ))
351            }
352        }
353    }
354
355    /// Writes this [PdfDocument] to the file at the given path.
356    ///
357    /// This function is not available when compiling to WASM. You have several options for
358    /// saving your PDF document data in WASM:
359    /// * Use either the [PdfDocument::save_to_writer()] or the [PdfDocument::save_to_bytes()] functions,
360    ///   both of which are available when compiling to WASM.
361    /// * Use the [PdfDocument::save_to_blob()] function to save document data directly into a new
362    ///   Javascript `Blob` object. This function is only available when compiling to WASM.
363    #[cfg(not(target_arch = "wasm32"))]
364    pub fn save_to_file(&self, path: &(impl AsRef<Path> + ?Sized)) -> Result<(), PdfiumError> {
365        self.save_to_writer(&mut File::create(path).map_err(PdfiumError::IoError)?)
366    }
367
368    /// Writes this [PdfDocument] to a new byte buffer, returning the byte buffer.
369    pub fn save_to_bytes(&self) -> Result<Vec<u8>, PdfiumError> {
370        let mut cursor = Cursor::new(Vec::new());
371
372        self.save_to_writer(&mut cursor)?;
373
374        Ok(cursor.into_inner())
375    }
376
377    /// Writes this [PdfDocument] to a new `Blob`, returning the `Blob`.
378    ///
379    /// This function is only available when compiling to WASM.
380    #[cfg(any(doc, target_arch = "wasm32"))]
381    pub fn save_to_blob(&self) -> Result<Blob, PdfiumError> {
382        let bytes = self.save_to_bytes()?;
383
384        let array = Uint8Array::new_with_length(bytes.len() as u32);
385
386        array.copy_from(bytes.as_slice());
387
388        let blob =
389            Blob::new_with_u8_array_sequence(&JsValue::from(Array::of1(&JsValue::from(array))))
390                .map_err(|_| PdfiumError::JsSysErrorConstructingBlobFromBytes)?;
391
392        Ok(blob)
393    }
394}
395
396impl<'a> Drop for PdfDocument<'a> {
397    /// Closes this [PdfDocument], releasing held memory and, if the document was loaded
398    /// from a file, the file handle on the document.
399    #[inline]
400    fn drop(&mut self) {
401        // Drop this document's PdfForm, if any, before we close the document itself.
402        // This ensures that FPDFDOC_ExitFormFillEnvironment() is called _before_ FPDF_CloseDocument(),
403        // avoiding a segmentation fault when using Pdfium builds compiled with V8/XFA support.
404
405        self.form = None;
406        self.bindings.FPDF_CloseDocument(self.handle);
407    }
408}
409
410impl<'a> Debug for PdfDocument<'a> {
411    #[inline]
412    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
413        f.debug_struct("PdfDocument")
414            .field("FPDF_DOCUMENT", &format!("{:?}", self.handle))
415            .finish()
416    }
417}
418
419#[cfg(feature = "sync")]
420unsafe impl<'a> Sync for PdfDocument<'a> {}
421
422#[cfg(feature = "sync")]
423unsafe impl<'a> Send for PdfDocument<'a> {}