pdfium_render/
pdfium.rs

1//! Defines the [Pdfium] struct, a high-level idiomatic Rust wrapper around Pdfium.
2
3use crate::bindings::PdfiumLibraryBindings;
4use crate::error::{PdfiumError, PdfiumInternalError};
5use crate::pdf::document::{PdfDocument, PdfDocumentVersion};
6use std::fmt::{Debug, Formatter};
7
8#[cfg(all(not(target_arch = "wasm32"), not(feature = "static")))]
9use {
10    crate::bindings::dynamic::DynamicPdfiumBindings, libloading::Library, std::ffi::OsString,
11    std::path::PathBuf,
12};
13
14#[cfg(all(not(target_arch = "wasm32"), feature = "static"))]
15use crate::bindings::static_bindings::StaticPdfiumBindings;
16
17#[cfg(not(target_arch = "wasm32"))]
18use {
19    crate::utils::files::get_pdfium_file_accessor_from_reader,
20    std::fs::File,
21    std::io::{Read, Seek},
22    std::path::Path,
23};
24
25#[cfg(target_arch = "wasm32")]
26use {
27    crate::bindings::wasm::{PdfiumRenderWasmState, WasmPdfiumBindings},
28    js_sys::{ArrayBuffer, Uint8Array},
29    wasm_bindgen::JsCast,
30    wasm_bindgen_futures::JsFuture,
31    web_sys::{window, Blob, Response},
32};
33
34#[cfg(feature = "thread_safe")]
35use crate::bindings::thread_safe::ThreadSafePdfiumBindings;
36
37// The following dummy declaration is used only when running cargo doc.
38// It allows documentation of WASM-specific functionality to be included
39// in documentation generated on non-WASM targets.
40
41#[cfg(doc)]
42struct Blob;
43
44/// A high-level idiomatic Rust wrapper around Pdfium, the C++ PDF library used by
45/// the Google Chromium project.
46pub struct Pdfium {
47    bindings: Box<dyn PdfiumLibraryBindings>,
48}
49
50impl Pdfium {
51    /// Binds to a Pdfium library that was statically linked into the currently running
52    /// executable, returning a new [PdfiumLibraryBindings] object that contains bindings to the
53    /// functions exposed by the library. The application will immediately crash if Pdfium
54    /// was not correctly statically linked into the executable at compile time.
55    ///
56    /// This function is only available when this crate's `static` feature is enabled.
57    #[cfg(not(target_arch = "wasm32"))]
58    #[cfg(any(doc, feature = "static"))]
59    #[inline]
60    pub fn bind_to_statically_linked_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError>
61    {
62        let bindings = StaticPdfiumBindings::new();
63
64        #[cfg(feature = "thread_safe")]
65        let bindings = ThreadSafePdfiumBindings::new(bindings);
66
67        Ok(Box::new(bindings))
68    }
69
70    /// Initializes the external Pdfium library, loading it from the system libraries.
71    /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions exposed
72    /// by the library, or an error if the library could not be loaded.
73    #[cfg(not(target_arch = "wasm32"))]
74    #[cfg(not(feature = "static"))]
75    #[inline]
76    pub fn bind_to_system_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
77        let bindings = DynamicPdfiumBindings::new(
78            unsafe { Library::new(Self::pdfium_platform_library_name()) }
79                .map_err(PdfiumError::LoadLibraryError)?,
80        )?;
81
82        #[cfg(feature = "thread_safe")]
83        let bindings = ThreadSafePdfiumBindings::new(bindings);
84
85        Ok(Box::new(bindings))
86    }
87
88    /// Initializes the external Pdfium library, binding to an external WASM module.
89    /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions exposed
90    /// by the library, or an error if the library is not available.
91    ///
92    /// It is essential that the exported `initialize_pdfium_render()` function be called
93    /// from Javascript _before_ calling this function from within your Rust code. For an example, see:
94    /// <https://github.com/ajrcarey/pdfium-render/blob/master/examples/index.html>
95    #[cfg(target_arch = "wasm32")]
96    #[inline]
97    pub fn bind_to_system_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
98        if PdfiumRenderWasmState::lock().is_ready() {
99            let bindings = WasmPdfiumBindings::new();
100
101            #[cfg(feature = "thread_safe")]
102            let bindings = ThreadSafePdfiumBindings::new(bindings);
103
104            Ok(Box::new(bindings))
105        } else {
106            Err(PdfiumError::PdfiumWASMModuleNotConfigured)
107        }
108    }
109
110    /// Initializes the external pdfium library, loading it from the given path.
111    /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions
112    /// exposed by the library, or an error if the library could not be loaded.
113    #[cfg(not(target_arch = "wasm32"))]
114    #[cfg(not(feature = "static"))]
115    #[inline]
116    pub fn bind_to_library(
117        path: impl AsRef<Path>,
118    ) -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
119        let bindings = DynamicPdfiumBindings::new(
120            unsafe { Library::new(path.as_ref().as_os_str()) }
121                .map_err(PdfiumError::LoadLibraryError)?,
122        )?;
123
124        #[cfg(feature = "thread_safe")]
125        let bindings = ThreadSafePdfiumBindings::new(bindings);
126
127        Ok(Box::new(bindings))
128    }
129
130    /// Returns the name of the external Pdfium library on the currently running platform.
131    /// On Linux and Android, this will be `libpdfium.so` or similar; on Windows, this will
132    /// be `pdfium.dll` or similar; on MacOS, this will be `libpdfium.dylib` or similar.
133    #[cfg(not(target_arch = "wasm32"))]
134    #[cfg(not(feature = "static"))]
135    #[inline]
136    pub fn pdfium_platform_library_name() -> OsString {
137        libloading::library_filename("pdfium")
138    }
139
140    /// Returns the name of the external Pdfium library on the currently running platform,
141    /// prefixed with the given path string.
142    #[cfg(not(target_arch = "wasm32"))]
143    #[cfg(not(feature = "static"))]
144    #[inline]
145    pub fn pdfium_platform_library_name_at_path(path: &(impl AsRef<Path> + ?Sized)) -> PathBuf {
146        path.as_ref().join(Pdfium::pdfium_platform_library_name())
147    }
148
149    /// Creates a new [Pdfium] instance from the given external Pdfium library bindings.
150    #[inline]
151    pub fn new(bindings: Box<dyn PdfiumLibraryBindings>) -> Self {
152        bindings.FPDF_InitLibrary();
153
154        Self { bindings }
155    }
156
157    // TODO: AJRC - 17/9/22 - remove deprecated Pdfium::get_bindings() function in 0.9.0
158    // as part of tracking issue https://github.com/ajrcarey/pdfium-render/issues/36
159    /// Returns the [PdfiumLibraryBindings] wrapped by this instance of [Pdfium].
160    #[deprecated(
161        since = "0.7.18",
162        note = "This function has been renamed. Use the Pdfium::bindings() function instead."
163    )]
164    #[doc(hidden)]
165    #[inline]
166    pub fn get_bindings(&self) -> &dyn PdfiumLibraryBindings {
167        self.bindings.as_ref()
168    }
169
170    /// Returns the [PdfiumLibraryBindings] wrapped by this instance of [Pdfium].
171    #[inline]
172    pub fn bindings(&self) -> &dyn PdfiumLibraryBindings {
173        self.bindings.as_ref()
174    }
175
176    // TODO: AJRC - 18/12/22 - remove deprecated Pdfium::load_pdf_from_bytes() function in 0.9.0
177    // as part of tracking issue https://github.com/ajrcarey/pdfium-render/issues/36
178    /// Returns the [PdfiumLibraryBindings] wrapped by this instance of [Pdfium].
179    #[deprecated(
180        since = "0.7.26",
181        note = "This function has been renamed. Use the Pdfium::load_pdf_from_byte_slice() function instead."
182    )]
183    #[doc(hidden)]
184    #[inline]
185    pub fn load_pdf_from_bytes(
186        &self,
187        bytes: &'static [u8],
188        password: Option<&str>,
189    ) -> Result<PdfDocument<'_>, PdfiumError> {
190        self.load_pdf_from_byte_slice(bytes, password)
191    }
192
193    /// Attempts to open a [PdfDocument] from the given static byte buffer.
194    ///
195    /// If the document is password protected, the given password will be used to unlock it.
196    pub fn load_pdf_from_byte_slice<'a>(
197        &'a self,
198        bytes: &'a [u8],
199        password: Option<&str>,
200    ) -> Result<PdfDocument<'a>, PdfiumError> {
201        Self::pdfium_document_handle_to_result(
202            self.bindings.FPDF_LoadMemDocument64(bytes, password),
203            self.bindings(),
204        )
205    }
206
207    /// Attempts to open a [PdfDocument] from the given owned byte buffer.
208    ///
209    /// If the document is password protected, the given password will be used to unlock it.
210    ///
211    /// `pdfium-render` will take ownership of the given byte buffer, ensuring its lifetime lasts
212    /// as long as the [PdfDocument] opened from it.
213    pub fn load_pdf_from_byte_vec(
214        &self,
215        bytes: Vec<u8>,
216        password: Option<&str>,
217    ) -> Result<PdfDocument<'_>, PdfiumError> {
218        Self::pdfium_document_handle_to_result(
219            self.bindings
220                .FPDF_LoadMemDocument64(bytes.as_slice(), password),
221            self.bindings(),
222        )
223        .map(|mut document| {
224            // Give the newly-created document ownership of the byte buffer, so that Pdfium can continue
225            // to read from it on an as-needed basis throughout the lifetime of the document.
226
227            document.set_source_byte_buffer(bytes);
228
229            document
230        })
231    }
232
233    /// Attempts to open a [PdfDocument] from the given file path.
234    ///
235    /// If the document is password protected, the given password will be used
236    /// to unlock it.
237    ///
238    /// This function is not available when compiling to WASM. You have several options for
239    /// loading your PDF document data in WASM:
240    /// * Use the [Pdfium::load_pdf_from_fetch()] function to download document data from a
241    ///   URL using the browser's built-in `fetch` API. This function is only available when
242    ///   compiling to WASM.
243    /// * Use the [Pdfium::load_pdf_from_blob()] function to load document data from a
244    ///   Javascript `File` or `Blob` object (such as a `File` object returned from an HTML
245    ///   `<input type="file">` element). This function is only available when compiling to WASM.
246    /// * Use another method to retrieve the bytes of the target document over the network,
247    ///   then load those bytes into Pdfium using either the [Pdfium::load_pdf_from_byte_slice()]
248    ///   function or the [Pdfium::load_pdf_from_byte_vec()] function.
249    /// * Embed the bytes of the target document directly into the compiled WASM module
250    ///   using the `include_bytes!` macro.
251    #[cfg(not(target_arch = "wasm32"))]
252    pub fn load_pdf_from_file<'a>(
253        &'a self,
254        path: &(impl AsRef<Path> + ?Sized),
255        password: Option<&'a str>,
256    ) -> Result<PdfDocument<'a>, PdfiumError> {
257        self.load_pdf_from_reader(File::open(path).map_err(PdfiumError::IoError)?, password)
258    }
259
260    /// Attempts to open a [PdfDocument] from the given reader.
261    ///
262    /// Pdfium will only load the portions of the document it actually needs into memory.
263    /// This is more efficient than loading the entire document into memory, especially when
264    /// working with large documents, and allows for working with documents larger than the
265    /// amount of available memory.
266    ///
267    /// Because Pdfium must know the total content length in advance prior to loading
268    /// any portion of it, the given reader must implement the [Seek] trait as well as
269    /// the [Read] trait.
270    ///
271    /// If the document is password protected, the given password will be used
272    /// to unlock it.
273    ///
274    /// This function is not available when compiling to WASM. You have several options for
275    /// loading your PDF document data in WASM:
276    /// * Use the [Pdfium::load_pdf_from_fetch()] function to download document data from a
277    ///   URL using the browser's built-in `fetch` API. This function is only available when
278    ///   compiling to WASM.
279    /// * Use the [Pdfium::load_pdf_from_blob()] function to load document data from a
280    ///   Javascript `File` or `Blob` object (such as a `File` object returned from an HTML
281    ///   `<input type="file">` element). This function is only available when compiling to WASM.
282    /// * Use another method to retrieve the bytes of the target document over the network,
283    ///   then load those bytes into Pdfium using either the [Pdfium::load_pdf_from_byte_slice()]
284    ///   function or the [Pdfium::load_pdf_from_byte_vec()] function.
285    /// * Embed the bytes of the target document directly into the compiled WASM module
286    ///   using the `include_bytes!` macro.
287    #[cfg(not(target_arch = "wasm32"))]
288    pub fn load_pdf_from_reader<'a, R: Read + Seek + 'a>(
289        &'a self,
290        reader: R,
291        password: Option<&'a str>,
292    ) -> Result<PdfDocument<'a>, PdfiumError> {
293        let mut reader = get_pdfium_file_accessor_from_reader(reader);
294
295        Pdfium::pdfium_document_handle_to_result(
296            self.bindings
297                .FPDF_LoadCustomDocument(reader.as_fpdf_file_access_mut_ptr(), password),
298            self.bindings(),
299        )
300        .map(|mut document| {
301            // Give the newly-created document ownership of the reader, so that Pdfium can continue
302            // to read from it on an as-needed basis throughout the lifetime of the document.
303
304            document.set_file_access_reader(reader);
305
306            document
307        })
308    }
309
310    /// Attempts to open a [PdfDocument] by loading document data from the given URL.
311    /// The Javascript `fetch` API is used to download data over the network.
312    ///
313    /// If the document is password protected, the given password will be used to unlock it.
314    ///
315    /// This function is only available when compiling to WASM.
316    #[cfg(any(doc, target_arch = "wasm32"))]
317    pub async fn load_pdf_from_fetch<'a>(
318        &'a self,
319        url: impl ToString,
320        password: Option<&str>,
321    ) -> Result<PdfDocument<'a>, PdfiumError> {
322        if let Some(window) = window() {
323            let fetch_result = JsFuture::from(window.fetch_with_str(url.to_string().as_str()))
324                .await
325                .map_err(PdfiumError::WebSysFetchError)?;
326
327            debug_assert!(fetch_result.is_instance_of::<Response>());
328
329            let response: Response = fetch_result
330                .dyn_into()
331                .map_err(|_| PdfiumError::WebSysInvalidResponseError)?;
332
333            let blob: Blob =
334                JsFuture::from(response.blob().map_err(PdfiumError::WebSysFetchError)?)
335                    .await
336                    .map_err(PdfiumError::WebSysFetchError)?
337                    .into();
338
339            self.load_pdf_from_blob(blob, password).await
340        } else {
341            Err(PdfiumError::WebSysWindowObjectNotAvailable)
342        }
343    }
344
345    /// Attempts to open a [PdfDocument] by loading document data from the given `Blob`.
346    /// A `File` object returned from a `FileList` is a suitable `Blob`:
347    ///
348    /// ```text
349    /// <input id="filePicker" type="file">
350    ///
351    /// const file = document.getElementById('filePicker').files[0];
352    /// ```
353    ///
354    /// If the document is password protected, the given password will be used to unlock it.
355    ///
356    /// This function is only available when compiling to WASM.
357    #[cfg(any(doc, target_arch = "wasm32"))]
358    pub async fn load_pdf_from_blob<'a>(
359        &'a self,
360        blob: Blob,
361        password: Option<&str>,
362    ) -> Result<PdfDocument<'a>, PdfiumError> {
363        let array_buffer: ArrayBuffer = JsFuture::from(blob.array_buffer())
364            .await
365            .map_err(PdfiumError::WebSysFetchError)?
366            .into();
367
368        let u8_array: Uint8Array = Uint8Array::new(&array_buffer);
369
370        let bytes: Vec<u8> = u8_array.to_vec();
371
372        self.load_pdf_from_byte_vec(bytes, password)
373    }
374
375    /// Creates a new, empty [PdfDocument] in memory.
376    pub fn create_new_pdf(&self) -> Result<PdfDocument<'_>, PdfiumError> {
377        Self::pdfium_document_handle_to_result(
378            self.bindings.FPDF_CreateNewDocument(),
379            self.bindings(),
380        )
381        .map(|mut document| {
382            document.set_version(PdfDocumentVersion::DEFAULT_VERSION);
383
384            document
385        })
386    }
387
388    /// Returns a [PdfDocument] from the given `FPDF_DOCUMENT` handle, if possible.
389    pub(crate) fn pdfium_document_handle_to_result(
390        handle: crate::bindgen::FPDF_DOCUMENT,
391        bindings: &dyn PdfiumLibraryBindings,
392    ) -> Result<PdfDocument<'_>, PdfiumError> {
393        if handle.is_null() {
394            // Retrieve the error code of the last error recorded by Pdfium.
395
396            if let Some(error) = match bindings.FPDF_GetLastError() as u32 {
397                crate::bindgen::FPDF_ERR_SUCCESS => None,
398                crate::bindgen::FPDF_ERR_UNKNOWN => Some(PdfiumInternalError::Unknown),
399                crate::bindgen::FPDF_ERR_FILE => Some(PdfiumInternalError::FileError),
400                crate::bindgen::FPDF_ERR_FORMAT => Some(PdfiumInternalError::FormatError),
401                crate::bindgen::FPDF_ERR_PASSWORD => Some(PdfiumInternalError::PasswordError),
402                crate::bindgen::FPDF_ERR_SECURITY => Some(PdfiumInternalError::SecurityError),
403                crate::bindgen::FPDF_ERR_PAGE => Some(PdfiumInternalError::PageError),
404                // The Pdfium documentation says "... if the previous SDK call succeeded, [then] the
405                // return value of this function is not defined". On Linux, at least, a return value
406                // of FPDF_ERR_SUCCESS seems to be consistently returned; on Windows, however, the
407                // return values are indeed unpredictable. See https://github.com/ajrcarey/pdfium-render/issues/24.
408                // Therefore, if the return value does not match one of the FPDF_ERR_* constants, we must
409                // assume success.
410                _ => None,
411            } {
412                Err(PdfiumError::PdfiumLibraryInternalError(error))
413            } else {
414                // This would be an unusual situation; a null handle indicating failure,
415                // yet Pdfium's error code indicates success.
416
417                Err(PdfiumError::PdfiumLibraryInternalError(
418                    PdfiumInternalError::Unknown,
419                ))
420            }
421        } else {
422            Ok(PdfDocument::from_pdfium(handle, bindings))
423        }
424    }
425}
426
427impl Drop for Pdfium {
428    /// Closes the external Pdfium library, releasing held memory.
429    #[inline]
430    fn drop(&mut self) {
431        self.bindings.FPDF_DestroyLibrary();
432    }
433}
434
435impl Default for Pdfium {
436    /// Binds to a Pdfium library that was statically linked into the currently running
437    /// executable by calling [Pdfium::bind_to_statically_linked_library]. This function
438    /// will panic if no statically linked Pdfium functions can be located.
439    #[cfg(feature = "static")]
440    #[inline]
441    fn default() -> Self {
442        Pdfium::new(Pdfium::bind_to_statically_linked_library().unwrap())
443    }
444
445    /// Binds to an external Pdfium library by first attempting to bind to a Pdfium library
446    /// in the current working directory; if that fails, then a system-provided library
447    /// will be used as a fall back.
448    ///
449    /// This function will panic if no suitable Pdfium library can be loaded.
450    #[cfg(not(feature = "static"))]
451    #[cfg(not(target_arch = "wasm32"))]
452    #[inline]
453    fn default() -> Self {
454        let bindings = match Pdfium::bind_to_library(
455            // Attempt to bind to a Pdfium library in the current working directory...
456            Pdfium::pdfium_platform_library_name_at_path("./"),
457        ) {
458            Ok(bindings) => Ok(bindings),
459            Err(PdfiumError::LoadLibraryError(err)) => {
460                match err {
461                    libloading::Error::DlOpen { .. } => {
462                        // For DlOpen errors specifically, indicating the Pdfium library in the
463                        // current working directory does not exist or is corrupted, we attempt
464                        // to fall back to a system-provided library.
465
466                        Pdfium::bind_to_system_library()
467                    }
468                    _ => Err(PdfiumError::LoadLibraryError(err)),
469                }
470            }
471            Err(err) => Err(err),
472        };
473
474        Pdfium::new(bindings.unwrap())
475    }
476
477    /// Binds to an external Pdfium library by attempting to a system-provided library.
478    ///
479    /// This function will panic if no suitable Pdfium library can be loaded.
480    #[cfg(target_arch = "wasm32")]
481    fn default() -> Self {
482        Pdfium::new(Pdfium::bind_to_system_library().unwrap())
483    }
484}
485
486impl Debug for Pdfium {
487    #[inline]
488    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
489        f.debug_struct("Pdfium").finish()
490    }
491}
492
493#[cfg(feature = "sync")]
494unsafe impl Sync for Pdfium {}
495
496#[cfg(feature = "sync")]
497unsafe impl Send for Pdfium {}