Skip to main content

pdfium_render/
pdfium.rs

1//! Defines the [Pdfium] struct, a high-level idiomatic Rust wrapper around Pdfium.
2
3use crate::bindgen::{
4    FPDF_DOCUMENT, FPDF_ERR_FILE, FPDF_ERR_FORMAT, FPDF_ERR_PAGE, FPDF_ERR_PASSWORD,
5    FPDF_ERR_SECURITY, FPDF_ERR_SUCCESS, FPDF_ERR_UNKNOWN,
6};
7use crate::bindings::PdfiumLibraryBindings;
8use crate::config::PdfiumLibraryConfig;
9use crate::error::{PdfiumError, PdfiumInternalError};
10use crate::pdf::document::{PdfDocument, PdfDocumentVersion};
11use once_cell::sync::OnceCell;
12use std::fmt::{Debug, Formatter};
13
14#[cfg(all(not(target_arch = "wasm32"), not(feature = "static")))]
15use {
16    crate::bindings::dynamic_bindings::DynamicPdfiumBindings, libloading::Library,
17    std::ffi::OsString, std::path::PathBuf,
18};
19
20#[cfg(all(not(target_arch = "wasm32"), feature = "static"))]
21use crate::bindings::static_bindings::StaticPdfiumBindings;
22
23#[cfg(not(target_arch = "wasm32"))]
24use {
25    crate::utils::files::get_pdfium_file_accessor_from_reader,
26    std::fs::File,
27    std::io::{Read, Seek},
28    std::path::Path,
29};
30
31#[cfg(target_arch = "wasm32")]
32use {
33    crate::bindings::wasm_bindings::{PdfiumRenderWasmState, WasmPdfiumBindings},
34    js_sys::{ArrayBuffer, Uint8Array},
35    wasm_bindgen::JsCast,
36    wasm_bindgen_futures::JsFuture,
37    web_sys::{window, Blob, Response},
38};
39
40// The following dummy declaration is used only when running cargo doc.
41// It allows documentation of WASM-specific functionality to be included
42// in documentation generated on non-WASM targets.
43#[cfg(doc)]
44struct Blob;
45
46// The first instantiation of a Pdfium object will promote a concrete PdfiumLibraryBindings
47// trait implementation into a global static OnceCell. This allows for thread-safe,
48// lifetime-free access to that PdfiumLibraryBindings instance from any object that
49// implements the PdfiumLibraryBindingsAccessor trait.
50static BINDINGS: OnceCell<Box<dyn PdfiumLibraryBindings>> = OnceCell::new();
51
52#[cfg(feature = "thread_safe")]
53pub(crate) trait PdfiumLibraryBindingsAccessor<'a>: Send + Sync {
54    fn bindings(&self) -> &'a dyn PdfiumLibraryBindings {
55        BINDINGS.wait().as_ref()
56    }
57}
58
59#[cfg(not(feature = "thread_safe"))]
60pub(crate) trait PdfiumLibraryBindingsAccessor<'a> {
61    fn bindings(&self) -> &'a dyn PdfiumLibraryBindings {
62        BINDINGS.get().unwrap().as_ref()
63    }
64}
65
66/// A high-level idiomatic Rust wrapper around Pdfium, the C++ PDF library used by
67/// the Google Chromium project.
68#[derive(Clone)]
69pub struct Pdfium {
70    #[allow(dead_code)]
71    pub(crate) config: Option<PdfiumLibraryConfig>,
72}
73
74impl Pdfium {
75    #[cfg(not(target_arch = "wasm32"))]
76    #[cfg(any(doc, feature = "static"))]
77    /// Binds to a Pdfium library that was statically linked into the currently running
78    /// executable, returning a new [PdfiumLibraryBindings] object that contains bindings to the
79    /// functions exposed by the library. The application will immediately crash if Pdfium
80    /// was not correctly statically linked into the executable at compile time.
81    ///
82    /// This function is only available when this crate's `static` feature is enabled.
83    #[inline]
84    pub fn bind_to_statically_linked_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError>
85    {
86        if BINDINGS.get().is_none() {
87            let bindings = StaticPdfiumBindings::new();
88
89            Ok(Box::new(bindings))
90        } else {
91            Err(PdfiumError::PdfiumLibraryBindingsAlreadyInitialized)
92        }
93    }
94
95    #[cfg(not(target_arch = "wasm32"))]
96    #[cfg(not(feature = "static"))]
97    /// Initializes the external Pdfium library, loading it from the system libraries.
98    /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions exposed
99    /// by the library, or an error if the library could not be loaded.
100    #[inline]
101    pub fn bind_to_system_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
102        if BINDINGS.get().is_none() {
103            let bindings = DynamicPdfiumBindings::new(
104                unsafe { Library::new(Self::pdfium_platform_library_name()) }
105                    .map_err(PdfiumError::LoadLibraryError)?,
106            )?;
107
108            Ok(Box::new(bindings))
109        } else {
110            Err(PdfiumError::PdfiumLibraryBindingsAlreadyInitialized)
111        }
112    }
113
114    #[cfg(target_arch = "wasm32")]
115    /// Initializes the external Pdfium library, binding to an external WASM module.
116    /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions exposed
117    /// by the library, or an error if the library is not available.
118    ///
119    /// It is essential that the exported `initialize_pdfium_render()` function be called
120    /// from Javascript _before_ calling this function from within your Rust code. For an example, see:
121    /// <https://github.com/ajrcarey/pdfium-render/blob/master/examples/index.html>
122    #[inline]
123    pub fn bind_to_system_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
124        if BINDINGS.get().is_none() {
125            if PdfiumRenderWasmState::lock().is_ready() {
126                let bindings = WasmPdfiumBindings::new();
127
128                Ok(Box::new(bindings))
129            } else {
130                Err(PdfiumError::PdfiumWasmModuleNotInitialized)
131            }
132        } else {
133            Err(PdfiumError::PdfiumLibraryBindingsAlreadyInitialized)
134        }
135    }
136
137    #[cfg(not(target_arch = "wasm32"))]
138    #[cfg(not(feature = "static"))]
139    /// Initializes the external pdfium library, loading it from the given path.
140    /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions
141    /// exposed by the library, or an error if the library could not be loaded.
142    #[inline]
143    pub fn bind_to_library(
144        path: impl AsRef<Path>,
145    ) -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
146        if BINDINGS.get().is_none() {
147            let bindings = DynamicPdfiumBindings::new(
148                unsafe { Library::new(path.as_ref().as_os_str()) }
149                    .map_err(PdfiumError::LoadLibraryError)?,
150            )?;
151
152            Ok(Box::new(bindings))
153        } else {
154            Err(PdfiumError::PdfiumLibraryBindingsAlreadyInitialized)
155        }
156    }
157
158    #[cfg(not(target_arch = "wasm32"))]
159    #[cfg(not(feature = "static"))]
160    /// Returns the name of the external Pdfium library on the currently running platform.
161    /// On Linux and Android, this will be `libpdfium.so` or similar; on Windows, this will
162    /// be `pdfium.dll` or similar; on MacOS, this will be `libpdfium.dylib` or similar.
163    #[inline]
164    pub fn pdfium_platform_library_name() -> OsString {
165        libloading::library_filename("pdfium")
166    }
167
168    #[cfg(not(target_arch = "wasm32"))]
169    #[cfg(not(feature = "static"))]
170    /// Returns the name of the external Pdfium library on the currently running platform,
171    /// prefixed with the given path string.
172    #[inline]
173    pub fn pdfium_platform_library_name_at_path(path: &(impl AsRef<Path> + ?Sized)) -> PathBuf {
174        path.as_ref().join(Pdfium::pdfium_platform_library_name())
175    }
176
177    /// Creates a new [Pdfium] instance from the given external Pdfium library bindings.
178    #[inline]
179    pub fn new(bindings: Box<dyn PdfiumLibraryBindings>) -> Self {
180        assert!(BINDINGS.get().is_none());
181        unsafe {
182            bindings.FPDF_InitLibrary();
183        }
184        assert!(BINDINGS.set(bindings).is_ok());
185
186        Self { config: None }
187    }
188
189    /// Creates a new [Pdfium] instance from the given external Pdfium library bindings,
190    /// using the custom library configuration in the given [PdfiumLibraryConfig].
191    #[inline]
192    pub fn new_with_config(
193        bindings: Box<dyn PdfiumLibraryBindings>,
194        config: PdfiumLibraryConfig,
195    ) -> Self {
196        assert!(BINDINGS.get().is_none());
197        unsafe {
198            bindings.FPDF_InitLibraryWithConfig(&config.as_pdfium());
199        }
200        assert!(BINDINGS.set(bindings).is_ok());
201
202        Self {
203            config: Some(config),
204        }
205    }
206
207    /// Attempts to open a [PdfDocument] from the given static byte buffer.
208    ///
209    /// If the document is password protected, the given password will be used to unlock it.
210    pub fn load_pdf_from_byte_slice<'a>(
211        &'a self,
212        bytes: &'a [u8],
213        password: Option<&str>,
214    ) -> Result<PdfDocument<'a>, PdfiumError> {
215        Self::pdfium_document_handle_to_result(
216            unsafe { self.bindings().FPDF_LoadMemDocument64(bytes, password) },
217            self.bindings(),
218        )
219    }
220
221    /// Attempts to open a [PdfDocument] from the given owned byte buffer.
222    ///
223    /// If the document is password protected, the given password will be used to unlock it.
224    ///
225    /// `pdfium-render` will take ownership of the given byte buffer, ensuring its lifetime lasts
226    /// as long as the [PdfDocument] opened from it.
227    pub fn load_pdf_from_byte_vec(
228        &self,
229        bytes: Vec<u8>,
230        password: Option<&str>,
231    ) -> Result<PdfDocument<'_>, PdfiumError> {
232        Self::pdfium_document_handle_to_result(
233            unsafe {
234                self.bindings()
235                    .FPDF_LoadMemDocument64(bytes.as_slice(), password)
236            },
237            self.bindings(),
238        )
239        .map(|mut document| {
240            // Give the newly-created document ownership of the byte buffer, so that Pdfium can continue
241            // to read from it on an as-needed basis throughout the lifetime of the document.
242
243            document.set_source_byte_buffer(bytes);
244
245            document
246        })
247    }
248
249    #[cfg(not(target_arch = "wasm32"))]
250    /// Attempts to open a [PdfDocument] from the given file path.
251    ///
252    /// If the document is password protected, the given password will be used
253    /// to unlock it.
254    ///
255    /// This function is not available when compiling to WASM. You have several options for
256    /// loading your PDF document data in WASM:
257    /// * Use the [Pdfium::load_pdf_from_fetch()] function to download document data from a
258    ///   URL using the browser's built-in `fetch` API. This function is only available when
259    ///   compiling to WASM.
260    /// * Use the [Pdfium::load_pdf_from_blob()] function to load document data from a
261    ///   Javascript `File` or `Blob` object (such as a `File` object returned from an HTML
262    ///   `<input type="file">` element). This function is only available when compiling to WASM.
263    /// * Use another method to retrieve the bytes of the target document over the network,
264    ///   then load those bytes into Pdfium using either the [Pdfium::load_pdf_from_byte_slice()]
265    ///   function or the [Pdfium::load_pdf_from_byte_vec()] function.
266    /// * Embed the bytes of the target document directly into the compiled WASM module
267    ///   using the `include_bytes!` macro.
268    pub fn load_pdf_from_file<'a>(
269        &'a self,
270        path: &(impl AsRef<Path> + ?Sized),
271        password: Option<&str>,
272    ) -> Result<PdfDocument<'a>, PdfiumError> {
273        self.load_pdf_from_reader(File::open(path).map_err(PdfiumError::IoError)?, password)
274    }
275
276    #[cfg(not(target_arch = "wasm32"))]
277    /// Attempts to open a [PdfDocument] from the given reader.
278    ///
279    /// Pdfium will only load the portions of the document it actually needs into memory.
280    /// This is more efficient than loading the entire document into memory, especially when
281    /// working with large documents, and allows for working with documents larger than the
282    /// amount of available memory.
283    ///
284    /// Because Pdfium must know the total content length in advance prior to loading
285    /// any portion of it, the given reader must implement the [Seek] trait as well as
286    /// the [Read] trait.
287    ///
288    /// If the document is password protected, the given password will be used
289    /// to unlock it.
290    ///
291    /// This function is not available when compiling to WASM. You have several options for
292    /// loading your PDF document data in WASM:
293    /// * Use the [Pdfium::load_pdf_from_fetch()] function to download document data from a
294    ///   URL using the browser's built-in `fetch` API. This function is only available when
295    ///   compiling to WASM.
296    /// * Use the [Pdfium::load_pdf_from_blob()] function to load document data from a
297    ///   Javascript `File` or `Blob` object (such as a `File` object returned from an HTML
298    ///   `<input type="file">` element). This function is only available when compiling to WASM.
299    /// * Use another method to retrieve the bytes of the target document over the network,
300    ///   then load those bytes into Pdfium using either the [Pdfium::load_pdf_from_byte_slice()]
301    ///   function or the [Pdfium::load_pdf_from_byte_vec()] function.
302    /// * Embed the bytes of the target document directly into the compiled WASM module
303    ///   using the `include_bytes!` macro.
304    pub fn load_pdf_from_reader<'a, R: Read + Seek + 'a>(
305        &'a self,
306        reader: R,
307        password: Option<&str>,
308    ) -> Result<PdfDocument<'a>, PdfiumError> {
309        let mut reader = get_pdfium_file_accessor_from_reader(reader);
310
311        Pdfium::pdfium_document_handle_to_result(
312            unsafe {
313                self.bindings()
314                    .FPDF_LoadCustomDocument(reader.as_fpdf_file_access_mut_ptr(), password)
315            },
316            self.bindings(),
317        )
318        .map(|mut document| {
319            // Give the newly-created document ownership of the reader, so that Pdfium can continue
320            // to read from it on an as-needed basis throughout the lifetime of the document.
321
322            document.set_file_access_reader(reader);
323
324            document
325        })
326    }
327
328    #[cfg(any(doc, target_arch = "wasm32"))]
329    /// Attempts to open a [PdfDocument] by loading document data from the given URL.
330    /// The Javascript `fetch` API is used to download data over the network.
331    ///
332    /// If the document is password protected, the given password will be used to unlock it.
333    ///
334    /// This function is only available when compiling to WASM.
335    pub async fn load_pdf_from_fetch<'a>(
336        &'a self,
337        url: impl ToString,
338        password: Option<&str>,
339    ) -> Result<PdfDocument<'a>, PdfiumError> {
340        if let Some(window) = window() {
341            let fetch_result = JsFuture::from(window.fetch_with_str(url.to_string().as_str()))
342                .await
343                .map_err(PdfiumError::WebSysFetchError)?;
344
345            debug_assert!(fetch_result.is_instance_of::<Response>());
346
347            let response: Response = fetch_result
348                .dyn_into()
349                .map_err(|_| PdfiumError::WebSysInvalidResponseError)?;
350
351            let blob: Blob =
352                JsFuture::from(response.blob().map_err(PdfiumError::WebSysFetchError)?)
353                    .await
354                    .map_err(PdfiumError::WebSysFetchError)?
355                    .into();
356
357            self.load_pdf_from_blob(blob, password).await
358        } else {
359            Err(PdfiumError::WebSysWindowObjectNotAvailable)
360        }
361    }
362
363    #[cfg(any(doc, target_arch = "wasm32"))]
364    /// Attempts to open a [PdfDocument] by loading document data from the given `Blob`.
365    /// A `File` object returned from a `FileList` is a suitable `Blob`:
366    ///
367    /// ```text
368    /// <input id="filePicker" type="file">
369    ///
370    /// const file = document.getElementById('filePicker').files[0];
371    /// ```
372    ///
373    /// If the document is password protected, the given password will be used to unlock it.
374    ///
375    /// This function is only available when compiling to WASM.
376    pub async fn load_pdf_from_blob<'a>(
377        &'a self,
378        blob: Blob,
379        password: Option<&str>,
380    ) -> Result<PdfDocument<'a>, PdfiumError> {
381        let array_buffer: ArrayBuffer = JsFuture::from(blob.array_buffer())
382            .await
383            .map_err(PdfiumError::WebSysFetchError)?
384            .into();
385
386        let u8_array: Uint8Array = Uint8Array::new(&array_buffer);
387
388        let bytes: Vec<u8> = u8_array.to_vec();
389
390        self.load_pdf_from_byte_vec(bytes, password)
391    }
392
393    /// Creates a new, empty [PdfDocument] in memory.
394    pub fn create_new_pdf<'a>(&'a self) -> Result<PdfDocument<'a>, PdfiumError> {
395        Self::pdfium_document_handle_to_result(
396            unsafe { self.bindings().FPDF_CreateNewDocument() },
397            self.bindings(),
398        )
399        .map(|mut document| {
400            document.set_version(PdfDocumentVersion::DEFAULT_VERSION);
401
402            document
403        })
404    }
405
406    /// Returns a [PdfDocument] from the given `FPDF_DOCUMENT` handle, if possible.
407    pub(crate) fn pdfium_document_handle_to_result(
408        handle: FPDF_DOCUMENT,
409        bindings: &dyn PdfiumLibraryBindings,
410    ) -> Result<PdfDocument<'_>, PdfiumError> {
411        if handle.is_null() {
412            // Retrieve the error code of the last error recorded by Pdfium.
413
414            if let Some(error) = match unsafe { bindings.FPDF_GetLastError() } as u32 {
415                FPDF_ERR_SUCCESS => None,
416                FPDF_ERR_UNKNOWN => Some(PdfiumInternalError::Unknown),
417                FPDF_ERR_FILE => Some(PdfiumInternalError::FileError),
418                FPDF_ERR_FORMAT => Some(PdfiumInternalError::FormatError),
419                FPDF_ERR_PASSWORD => Some(PdfiumInternalError::PasswordError),
420                FPDF_ERR_SECURITY => Some(PdfiumInternalError::SecurityError),
421                FPDF_ERR_PAGE => Some(PdfiumInternalError::PageError),
422                // The Pdfium documentation says "... if the previous SDK call succeeded, [then] the
423                // return value of this function is not defined". On Linux, at least, a return value
424                // of FPDF_ERR_SUCCESS seems to be consistently returned; on Windows, however, the
425                // return values are indeed unpredictable. See https://github.com/ajrcarey/pdfium-render/issues/24.
426                // Therefore, if the return value does not match one of the FPDF_ERR_* constants, we must
427                // assume success.
428                _ => None,
429            } {
430                Err(PdfiumError::PdfiumLibraryInternalError(error))
431            } else {
432                // This would be an unusual situation; a null handle indicating failure,
433                // yet Pdfium's error code indicates success.
434
435                Err(PdfiumError::PdfiumLibraryInternalError(
436                    PdfiumInternalError::Unknown,
437                ))
438            }
439        } else {
440            Ok(PdfDocument::from_pdfium(handle))
441        }
442    }
443}
444
445impl Default for Pdfium {
446    #[cfg(feature = "static")]
447    /// Binds to a Pdfium library that was statically linked into the currently running
448    /// executable by calling [Pdfium::bind_to_statically_linked_library]. This function
449    /// will panic if no statically linked Pdfium functions can be located.
450    #[inline]
451    fn default() -> Self {
452        Pdfium::new(Pdfium::bind_to_statically_linked_library().unwrap())
453    }
454
455    #[cfg(not(feature = "static"))]
456    #[cfg(not(target_arch = "wasm32"))]
457    /// Binds to an external Pdfium library by first attempting to bind to a Pdfium library
458    /// in the current working directory; if that fails, then a system-provided library
459    /// will be used as a fall back.
460    ///
461    /// This function will panic if no suitable Pdfium library can be loaded.
462    #[inline]
463    fn default() -> Self {
464        // Attempt to bind to a Pdfium library in the current working directory.
465
466        match Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./")) {
467            Ok(bindings) => Pdfium::new(bindings), // Create new bindings
468            Err(PdfiumError::PdfiumLibraryBindingsAlreadyInitialized) => Pdfium { config: None }, // Re-use the existing bindings
469            Err(PdfiumError::LoadLibraryError(err)) => {
470                match err {
471                    libloading::Error::DlOpen { .. } => {
472                        // For DlOpen errors specifically, indicating the Pdfium library in the
473                        // current working directory does not exist or is corrupted, we attempt
474                        // to fall back to a system-provided library.
475
476                        Pdfium::new(Pdfium::bind_to_system_library().unwrap())
477                    }
478                    _ => Err(PdfiumError::LoadLibraryError(err)).unwrap(), // Explicitly re-throw the error
479                }
480            }
481            Err(err) => Err(err).unwrap(), // Explicitly re-throw the error
482        }
483    }
484
485    #[cfg(target_arch = "wasm32")]
486    /// Binds to an external Pdfium library by attempting to a system-provided library.
487    ///
488    /// This function will panic if no suitable Pdfium library can be loaded.
489    fn default() -> Self {
490        Pdfium::new(Pdfium::bind_to_system_library().unwrap())
491    }
492}
493
494impl Debug for Pdfium {
495    #[inline]
496    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
497        f.debug_struct("Pdfium").finish()
498    }
499}
500
501impl PdfiumLibraryBindingsAccessor<'_> for Pdfium {}
502
503#[cfg(feature = "thread_safe")]
504unsafe impl Sync for Pdfium {}
505
506#[cfg(feature = "thread_safe")]
507unsafe impl Send for Pdfium {}