pdfium_render/pdfium.rs
1//! Defines the [Pdfium] struct, a high-level idiomatic Rust wrapper around Pdfium.
2
3use crate::bindgen::{
4 FPDF_DOCUMENT, FPDF_ERR_FILE, FPDF_ERR_FORMAT, FPDF_ERR_PAGE, FPDF_ERR_PASSWORD,
5 FPDF_ERR_SECURITY, FPDF_ERR_SUCCESS, FPDF_ERR_UNKNOWN,
6};
7use crate::bindings::PdfiumLibraryBindings;
8use crate::config::PdfiumLibraryConfig;
9use crate::error::{PdfiumError, PdfiumInternalError};
10use crate::pdf::document::{PdfDocument, PdfDocumentVersion};
11use once_cell::sync::OnceCell;
12use std::fmt::{Debug, Formatter};
13
14#[cfg(all(not(target_arch = "wasm32"), not(feature = "static")))]
15use {
16 crate::bindings::dynamic_bindings::DynamicPdfiumBindings, libloading::Library,
17 std::ffi::OsString, std::path::PathBuf,
18};
19
20#[cfg(all(not(target_arch = "wasm32"), feature = "static"))]
21use crate::bindings::static_bindings::StaticPdfiumBindings;
22
23#[cfg(not(target_arch = "wasm32"))]
24use {
25 crate::utils::files::get_pdfium_file_accessor_from_reader,
26 std::fs::File,
27 std::io::{Read, Seek},
28 std::path::Path,
29};
30
31#[cfg(target_arch = "wasm32")]
32use {
33 crate::bindings::wasm_bindings::{PdfiumRenderWasmState, WasmPdfiumBindings},
34 js_sys::{ArrayBuffer, Uint8Array},
35 wasm_bindgen::JsCast,
36 wasm_bindgen_futures::JsFuture,
37 web_sys::{window, Blob, Response},
38};
39
40// The following dummy declaration is used only when running cargo doc.
41// It allows documentation of WASM-specific functionality to be included
42// in documentation generated on non-WASM targets.
43#[cfg(doc)]
44struct Blob;
45
46// The first instantiation of a Pdfium object will promote a concrete PdfiumLibraryBindings
47// trait implementation into a global static OnceCell. This allows for thread-safe,
48// lifetime-free access to that PdfiumLibraryBindings instance from any object that
49// implements the PdfiumLibraryBindingsAccessor trait.
50static BINDINGS: OnceCell<Box<dyn PdfiumLibraryBindings>> = OnceCell::new();
51
52#[cfg(feature = "thread_safe")]
53pub(crate) trait PdfiumLibraryBindingsAccessor<'a>: Send + Sync {
54 fn bindings(&self) -> &'a dyn PdfiumLibraryBindings {
55 BINDINGS.wait().as_ref()
56 }
57}
58
59#[cfg(not(feature = "thread_safe"))]
60pub(crate) trait PdfiumLibraryBindingsAccessor<'a> {
61 fn bindings(&self) -> &'a dyn PdfiumLibraryBindings {
62 BINDINGS.get().unwrap().as_ref()
63 }
64}
65
66/// A high-level idiomatic Rust wrapper around Pdfium, the C++ PDF library used by
67/// the Google Chromium project.
68#[derive(Clone)]
69pub struct Pdfium {
70 #[allow(dead_code)]
71 pub(crate) config: Option<PdfiumLibraryConfig>,
72}
73
74impl Pdfium {
75 #[cfg(not(target_arch = "wasm32"))]
76 #[cfg(any(doc, feature = "static"))]
77 /// Binds to a Pdfium library that was statically linked into the currently running
78 /// executable, returning a new [PdfiumLibraryBindings] object that contains bindings to the
79 /// functions exposed by the library. The application will immediately crash if Pdfium
80 /// was not correctly statically linked into the executable at compile time.
81 ///
82 /// This function is only available when this crate's `static` feature is enabled.
83 #[inline]
84 pub fn bind_to_statically_linked_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError>
85 {
86 if BINDINGS.get().is_none() {
87 let bindings = StaticPdfiumBindings::new();
88
89 Ok(Box::new(bindings))
90 } else {
91 Err(PdfiumError::PdfiumLibraryBindingsAlreadyInitialized)
92 }
93 }
94
95 #[cfg(not(target_arch = "wasm32"))]
96 #[cfg(not(feature = "static"))]
97 /// Initializes the external Pdfium library, loading it from the system libraries.
98 /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions exposed
99 /// by the library, or an error if the library could not be loaded.
100 #[inline]
101 pub fn bind_to_system_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
102 if BINDINGS.get().is_none() {
103 let bindings = DynamicPdfiumBindings::new(
104 unsafe { Library::new(Self::pdfium_platform_library_name()) }
105 .map_err(PdfiumError::LoadLibraryError)?,
106 )?;
107
108 Ok(Box::new(bindings))
109 } else {
110 Err(PdfiumError::PdfiumLibraryBindingsAlreadyInitialized)
111 }
112 }
113
114 #[cfg(target_arch = "wasm32")]
115 /// Initializes the external Pdfium library, binding to an external WASM module.
116 /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions exposed
117 /// by the library, or an error if the library is not available.
118 ///
119 /// It is essential that the exported `initialize_pdfium_render()` function be called
120 /// from Javascript _before_ calling this function from within your Rust code. For an example, see:
121 /// <https://github.com/ajrcarey/pdfium-render/blob/master/examples/index.html>
122 #[inline]
123 pub fn bind_to_system_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
124 if BINDINGS.get().is_none() {
125 if PdfiumRenderWasmState::lock().is_ready() {
126 let bindings = WasmPdfiumBindings::new();
127
128 Ok(Box::new(bindings))
129 } else {
130 Err(PdfiumError::PdfiumWasmModuleNotInitialized)
131 }
132 } else {
133 Err(PdfiumError::PdfiumLibraryBindingsAlreadyInitialized)
134 }
135 }
136
137 #[cfg(not(target_arch = "wasm32"))]
138 #[cfg(not(feature = "static"))]
139 /// Initializes the external pdfium library, loading it from the given path.
140 /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions
141 /// exposed by the library, or an error if the library could not be loaded.
142 #[inline]
143 pub fn bind_to_library(
144 path: impl AsRef<Path>,
145 ) -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
146 if BINDINGS.get().is_none() {
147 let bindings = DynamicPdfiumBindings::new(
148 unsafe { Library::new(path.as_ref().as_os_str()) }
149 .map_err(PdfiumError::LoadLibraryError)?,
150 )?;
151
152 Ok(Box::new(bindings))
153 } else {
154 Err(PdfiumError::PdfiumLibraryBindingsAlreadyInitialized)
155 }
156 }
157
158 #[cfg(not(target_arch = "wasm32"))]
159 #[cfg(not(feature = "static"))]
160 /// Returns the name of the external Pdfium library on the currently running platform.
161 /// On Linux and Android, this will be `libpdfium.so` or similar; on Windows, this will
162 /// be `pdfium.dll` or similar; on MacOS, this will be `libpdfium.dylib` or similar.
163 #[inline]
164 pub fn pdfium_platform_library_name() -> OsString {
165 libloading::library_filename("pdfium")
166 }
167
168 #[cfg(not(target_arch = "wasm32"))]
169 #[cfg(not(feature = "static"))]
170 /// Returns the name of the external Pdfium library on the currently running platform,
171 /// prefixed with the given path string.
172 #[inline]
173 pub fn pdfium_platform_library_name_at_path(path: &(impl AsRef<Path> + ?Sized)) -> PathBuf {
174 path.as_ref().join(Pdfium::pdfium_platform_library_name())
175 }
176
177 /// Creates a new [Pdfium] instance from the given external Pdfium library bindings.
178 #[inline]
179 pub fn new(bindings: Box<dyn PdfiumLibraryBindings>) -> Self {
180 assert!(BINDINGS.get().is_none());
181 unsafe {
182 bindings.FPDF_InitLibrary();
183 }
184 assert!(BINDINGS.set(bindings).is_ok());
185
186 Self { config: None }
187 }
188
189 /// Creates a new [Pdfium] instance from the given external Pdfium library bindings,
190 /// using the custom library configuration in the given [PdfiumLibraryConfig].
191 #[inline]
192 pub fn new_with_config(
193 bindings: Box<dyn PdfiumLibraryBindings>,
194 config: PdfiumLibraryConfig,
195 ) -> Self {
196 assert!(BINDINGS.get().is_none());
197 unsafe {
198 bindings.FPDF_InitLibraryWithConfig(&config.as_pdfium());
199 }
200 assert!(BINDINGS.set(bindings).is_ok());
201
202 Self {
203 config: Some(config),
204 }
205 }
206
207 /// Attempts to open a [PdfDocument] from the given static byte buffer.
208 ///
209 /// If the document is password protected, the given password will be used to unlock it.
210 pub fn load_pdf_from_byte_slice<'a>(
211 &'a self,
212 bytes: &'a [u8],
213 password: Option<&str>,
214 ) -> Result<PdfDocument<'a>, PdfiumError> {
215 Self::pdfium_document_handle_to_result(
216 unsafe { self.bindings().FPDF_LoadMemDocument64(bytes, password) },
217 self.bindings(),
218 )
219 }
220
221 /// Attempts to open a [PdfDocument] from the given owned byte buffer.
222 ///
223 /// If the document is password protected, the given password will be used to unlock it.
224 ///
225 /// `pdfium-render` will take ownership of the given byte buffer, ensuring its lifetime lasts
226 /// as long as the [PdfDocument] opened from it.
227 pub fn load_pdf_from_byte_vec(
228 &self,
229 bytes: Vec<u8>,
230 password: Option<&str>,
231 ) -> Result<PdfDocument<'_>, PdfiumError> {
232 Self::pdfium_document_handle_to_result(
233 unsafe {
234 self.bindings()
235 .FPDF_LoadMemDocument64(bytes.as_slice(), password)
236 },
237 self.bindings(),
238 )
239 .map(|mut document| {
240 // Give the newly-created document ownership of the byte buffer, so that Pdfium can continue
241 // to read from it on an as-needed basis throughout the lifetime of the document.
242
243 document.set_source_byte_buffer(bytes);
244
245 document
246 })
247 }
248
249 #[cfg(not(target_arch = "wasm32"))]
250 /// Attempts to open a [PdfDocument] from the given file path.
251 ///
252 /// If the document is password protected, the given password will be used
253 /// to unlock it.
254 ///
255 /// This function is not available when compiling to WASM. You have several options for
256 /// loading your PDF document data in WASM:
257 /// * Use the [Pdfium::load_pdf_from_fetch()] function to download document data from a
258 /// URL using the browser's built-in `fetch` API. This function is only available when
259 /// compiling to WASM.
260 /// * Use the [Pdfium::load_pdf_from_blob()] function to load document data from a
261 /// Javascript `File` or `Blob` object (such as a `File` object returned from an HTML
262 /// `<input type="file">` element). This function is only available when compiling to WASM.
263 /// * Use another method to retrieve the bytes of the target document over the network,
264 /// then load those bytes into Pdfium using either the [Pdfium::load_pdf_from_byte_slice()]
265 /// function or the [Pdfium::load_pdf_from_byte_vec()] function.
266 /// * Embed the bytes of the target document directly into the compiled WASM module
267 /// using the `include_bytes!` macro.
268 pub fn load_pdf_from_file<'a>(
269 &'a self,
270 path: &(impl AsRef<Path> + ?Sized),
271 password: Option<&str>,
272 ) -> Result<PdfDocument<'a>, PdfiumError> {
273 self.load_pdf_from_reader(File::open(path).map_err(PdfiumError::IoError)?, password)
274 }
275
276 #[cfg(not(target_arch = "wasm32"))]
277 /// Attempts to open a [PdfDocument] from the given reader.
278 ///
279 /// Pdfium will only load the portions of the document it actually needs into memory.
280 /// This is more efficient than loading the entire document into memory, especially when
281 /// working with large documents, and allows for working with documents larger than the
282 /// amount of available memory.
283 ///
284 /// Because Pdfium must know the total content length in advance prior to loading
285 /// any portion of it, the given reader must implement the [Seek] trait as well as
286 /// the [Read] trait.
287 ///
288 /// If the document is password protected, the given password will be used
289 /// to unlock it.
290 ///
291 /// This function is not available when compiling to WASM. You have several options for
292 /// loading your PDF document data in WASM:
293 /// * Use the [Pdfium::load_pdf_from_fetch()] function to download document data from a
294 /// URL using the browser's built-in `fetch` API. This function is only available when
295 /// compiling to WASM.
296 /// * Use the [Pdfium::load_pdf_from_blob()] function to load document data from a
297 /// Javascript `File` or `Blob` object (such as a `File` object returned from an HTML
298 /// `<input type="file">` element). This function is only available when compiling to WASM.
299 /// * Use another method to retrieve the bytes of the target document over the network,
300 /// then load those bytes into Pdfium using either the [Pdfium::load_pdf_from_byte_slice()]
301 /// function or the [Pdfium::load_pdf_from_byte_vec()] function.
302 /// * Embed the bytes of the target document directly into the compiled WASM module
303 /// using the `include_bytes!` macro.
304 pub fn load_pdf_from_reader<'a, R: Read + Seek + 'a>(
305 &'a self,
306 reader: R,
307 password: Option<&str>,
308 ) -> Result<PdfDocument<'a>, PdfiumError> {
309 let mut reader = get_pdfium_file_accessor_from_reader(reader);
310
311 Pdfium::pdfium_document_handle_to_result(
312 unsafe {
313 self.bindings()
314 .FPDF_LoadCustomDocument(reader.as_fpdf_file_access_mut_ptr(), password)
315 },
316 self.bindings(),
317 )
318 .map(|mut document| {
319 // Give the newly-created document ownership of the reader, so that Pdfium can continue
320 // to read from it on an as-needed basis throughout the lifetime of the document.
321
322 document.set_file_access_reader(reader);
323
324 document
325 })
326 }
327
328 #[cfg(any(doc, target_arch = "wasm32"))]
329 /// Attempts to open a [PdfDocument] by loading document data from the given URL.
330 /// The Javascript `fetch` API is used to download data over the network.
331 ///
332 /// If the document is password protected, the given password will be used to unlock it.
333 ///
334 /// This function is only available when compiling to WASM.
335 pub async fn load_pdf_from_fetch<'a>(
336 &'a self,
337 url: impl ToString,
338 password: Option<&str>,
339 ) -> Result<PdfDocument<'a>, PdfiumError> {
340 if let Some(window) = window() {
341 let fetch_result = JsFuture::from(window.fetch_with_str(url.to_string().as_str()))
342 .await
343 .map_err(PdfiumError::WebSysFetchError)?;
344
345 debug_assert!(fetch_result.is_instance_of::<Response>());
346
347 let response: Response = fetch_result
348 .dyn_into()
349 .map_err(|_| PdfiumError::WebSysInvalidResponseError)?;
350
351 let blob: Blob =
352 JsFuture::from(response.blob().map_err(PdfiumError::WebSysFetchError)?)
353 .await
354 .map_err(PdfiumError::WebSysFetchError)?
355 .into();
356
357 self.load_pdf_from_blob(blob, password).await
358 } else {
359 Err(PdfiumError::WebSysWindowObjectNotAvailable)
360 }
361 }
362
363 #[cfg(any(doc, target_arch = "wasm32"))]
364 /// Attempts to open a [PdfDocument] by loading document data from the given `Blob`.
365 /// A `File` object returned from a `FileList` is a suitable `Blob`:
366 ///
367 /// ```text
368 /// <input id="filePicker" type="file">
369 ///
370 /// const file = document.getElementById('filePicker').files[0];
371 /// ```
372 ///
373 /// If the document is password protected, the given password will be used to unlock it.
374 ///
375 /// This function is only available when compiling to WASM.
376 pub async fn load_pdf_from_blob<'a>(
377 &'a self,
378 blob: Blob,
379 password: Option<&str>,
380 ) -> Result<PdfDocument<'a>, PdfiumError> {
381 let array_buffer: ArrayBuffer = JsFuture::from(blob.array_buffer())
382 .await
383 .map_err(PdfiumError::WebSysFetchError)?
384 .into();
385
386 let u8_array: Uint8Array = Uint8Array::new(&array_buffer);
387
388 let bytes: Vec<u8> = u8_array.to_vec();
389
390 self.load_pdf_from_byte_vec(bytes, password)
391 }
392
393 /// Creates a new, empty [PdfDocument] in memory.
394 pub fn create_new_pdf<'a>(&'a self) -> Result<PdfDocument<'a>, PdfiumError> {
395 Self::pdfium_document_handle_to_result(
396 unsafe { self.bindings().FPDF_CreateNewDocument() },
397 self.bindings(),
398 )
399 .map(|mut document| {
400 document.set_version(PdfDocumentVersion::DEFAULT_VERSION);
401
402 document
403 })
404 }
405
406 /// Returns a [PdfDocument] from the given `FPDF_DOCUMENT` handle, if possible.
407 pub(crate) fn pdfium_document_handle_to_result(
408 handle: FPDF_DOCUMENT,
409 bindings: &dyn PdfiumLibraryBindings,
410 ) -> Result<PdfDocument<'_>, PdfiumError> {
411 if handle.is_null() {
412 // Retrieve the error code of the last error recorded by Pdfium.
413
414 if let Some(error) = match unsafe { bindings.FPDF_GetLastError() } as u32 {
415 FPDF_ERR_SUCCESS => None,
416 FPDF_ERR_UNKNOWN => Some(PdfiumInternalError::Unknown),
417 FPDF_ERR_FILE => Some(PdfiumInternalError::FileError),
418 FPDF_ERR_FORMAT => Some(PdfiumInternalError::FormatError),
419 FPDF_ERR_PASSWORD => Some(PdfiumInternalError::PasswordError),
420 FPDF_ERR_SECURITY => Some(PdfiumInternalError::SecurityError),
421 FPDF_ERR_PAGE => Some(PdfiumInternalError::PageError),
422 // The Pdfium documentation says "... if the previous SDK call succeeded, [then] the
423 // return value of this function is not defined". On Linux, at least, a return value
424 // of FPDF_ERR_SUCCESS seems to be consistently returned; on Windows, however, the
425 // return values are indeed unpredictable. See https://github.com/ajrcarey/pdfium-render/issues/24.
426 // Therefore, if the return value does not match one of the FPDF_ERR_* constants, we must
427 // assume success.
428 _ => None,
429 } {
430 Err(PdfiumError::PdfiumLibraryInternalError(error))
431 } else {
432 // This would be an unusual situation; a null handle indicating failure,
433 // yet Pdfium's error code indicates success.
434
435 Err(PdfiumError::PdfiumLibraryInternalError(
436 PdfiumInternalError::Unknown,
437 ))
438 }
439 } else {
440 Ok(PdfDocument::from_pdfium(handle))
441 }
442 }
443}
444
445impl Default for Pdfium {
446 #[cfg(feature = "static")]
447 /// Binds to a Pdfium library that was statically linked into the currently running
448 /// executable by calling [Pdfium::bind_to_statically_linked_library]. This function
449 /// will panic if no statically linked Pdfium functions can be located.
450 #[inline]
451 fn default() -> Self {
452 Pdfium::new(Pdfium::bind_to_statically_linked_library().unwrap())
453 }
454
455 #[cfg(not(feature = "static"))]
456 #[cfg(not(target_arch = "wasm32"))]
457 /// Binds to an external Pdfium library by first attempting to bind to a Pdfium library
458 /// in the current working directory; if that fails, then a system-provided library
459 /// will be used as a fall back.
460 ///
461 /// This function will panic if no suitable Pdfium library can be loaded.
462 #[inline]
463 fn default() -> Self {
464 // Attempt to bind to a Pdfium library in the current working directory.
465
466 match Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./")) {
467 Ok(bindings) => Pdfium::new(bindings), // Create new bindings
468 Err(PdfiumError::PdfiumLibraryBindingsAlreadyInitialized) => Pdfium { config: None }, // Re-use the existing bindings
469 Err(PdfiumError::LoadLibraryError(err)) => {
470 match err {
471 libloading::Error::DlOpen { .. } => {
472 // For DlOpen errors specifically, indicating the Pdfium library in the
473 // current working directory does not exist or is corrupted, we attempt
474 // to fall back to a system-provided library.
475
476 Pdfium::new(Pdfium::bind_to_system_library().unwrap())
477 }
478 _ => Err(PdfiumError::LoadLibraryError(err)).unwrap(), // Explicitly re-throw the error
479 }
480 }
481 Err(err) => Err(err).unwrap(), // Explicitly re-throw the error
482 }
483 }
484
485 #[cfg(target_arch = "wasm32")]
486 /// Binds to an external Pdfium library by attempting to a system-provided library.
487 ///
488 /// This function will panic if no suitable Pdfium library can be loaded.
489 fn default() -> Self {
490 Pdfium::new(Pdfium::bind_to_system_library().unwrap())
491 }
492}
493
494impl Debug for Pdfium {
495 #[inline]
496 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
497 f.debug_struct("Pdfium").finish()
498 }
499}
500
501impl PdfiumLibraryBindingsAccessor<'_> for Pdfium {}
502
503#[cfg(feature = "thread_safe")]
504unsafe impl Sync for Pdfium {}
505
506#[cfg(feature = "thread_safe")]
507unsafe impl Send for Pdfium {}