pdfium_render/pdfium.rs
1//! Defines the [Pdfium] struct, a high-level idiomatic Rust wrapper around Pdfium.
2
3use crate::bindings::PdfiumLibraryBindings;
4use crate::error::{PdfiumError, PdfiumInternalError};
5use crate::pdf::document::{PdfDocument, PdfDocumentVersion};
6use std::fmt::{Debug, Formatter};
7
8#[cfg(all(not(target_arch = "wasm32"), not(feature = "static")))]
9use {
10 crate::bindings::dynamic::DynamicPdfiumBindings, libloading::Library, std::ffi::OsString,
11 std::path::PathBuf,
12};
13
14#[cfg(all(not(target_arch = "wasm32"), feature = "static"))]
15use crate::bindings::static_bindings::StaticPdfiumBindings;
16
17#[cfg(not(target_arch = "wasm32"))]
18use {
19 crate::utils::files::get_pdfium_file_accessor_from_reader,
20 std::fs::File,
21 std::io::{Read, Seek},
22 std::path::Path,
23};
24
25#[cfg(target_arch = "wasm32")]
26use {
27 crate::bindings::wasm::{PdfiumRenderWasmState, WasmPdfiumBindings},
28 js_sys::{ArrayBuffer, Uint8Array},
29 wasm_bindgen::JsCast,
30 wasm_bindgen_futures::JsFuture,
31 web_sys::{window, Blob, Response},
32};
33
34#[cfg(feature = "thread_safe")]
35use crate::bindings::thread_safe::ThreadSafePdfiumBindings;
36
37// The following dummy declaration is used only when running cargo doc.
38// It allows documentation of WASM-specific functionality to be included
39// in documentation generated on non-WASM targets.
40
41#[cfg(doc)]
42struct Blob;
43
44/// A high-level idiomatic Rust wrapper around Pdfium, the C++ PDF library used by
45/// the Google Chromium project.
46pub struct Pdfium {
47 bindings: Box<dyn PdfiumLibraryBindings>,
48}
49
50impl Pdfium {
51 /// Binds to a Pdfium library that was statically linked into the currently running
52 /// executable, returning a new [PdfiumLibraryBindings] object that contains bindings to the
53 /// functions exposed by the library. The application will immediately crash if Pdfium
54 /// was not correctly statically linked into the executable at compile time.
55 ///
56 /// This function is only available when this crate's `static` feature is enabled.
57 #[cfg(not(target_arch = "wasm32"))]
58 #[cfg(any(doc, feature = "static"))]
59 #[inline]
60 pub fn bind_to_statically_linked_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError>
61 {
62 let bindings = StaticPdfiumBindings::new();
63
64 #[cfg(feature = "thread_safe")]
65 let bindings = ThreadSafePdfiumBindings::new(bindings);
66
67 Ok(Box::new(bindings))
68 }
69
70 /// Initializes the external Pdfium library, loading it from the system libraries.
71 /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions exposed
72 /// by the library, or an error if the library could not be loaded.
73 #[cfg(not(target_arch = "wasm32"))]
74 #[cfg(not(feature = "static"))]
75 #[inline]
76 pub fn bind_to_system_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
77 let bindings = DynamicPdfiumBindings::new(
78 unsafe { Library::new(Self::pdfium_platform_library_name()) }
79 .map_err(PdfiumError::LoadLibraryError)?,
80 )?;
81
82 #[cfg(feature = "thread_safe")]
83 let bindings = ThreadSafePdfiumBindings::new(bindings);
84
85 Ok(Box::new(bindings))
86 }
87
88 /// Initializes the external Pdfium library, binding to an external WASM module.
89 /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions exposed
90 /// by the library, or an error if the library is not available.
91 ///
92 /// It is essential that the exported `initialize_pdfium_render()` function be called
93 /// from Javascript _before_ calling this function from within your Rust code. For an example, see:
94 /// <https://github.com/ajrcarey/pdfium-render/blob/master/examples/index.html>
95 #[cfg(target_arch = "wasm32")]
96 #[inline]
97 pub fn bind_to_system_library() -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
98 if PdfiumRenderWasmState::lock().is_ready() {
99 let bindings = WasmPdfiumBindings::new();
100
101 #[cfg(feature = "thread_safe")]
102 let bindings = ThreadSafePdfiumBindings::new(bindings);
103
104 Ok(Box::new(bindings))
105 } else {
106 Err(PdfiumError::PdfiumWASMModuleNotConfigured)
107 }
108 }
109
110 /// Initializes the external pdfium library, loading it from the given path.
111 /// Returns a new [PdfiumLibraryBindings] object that contains bindings to the functions
112 /// exposed by the library, or an error if the library could not be loaded.
113 #[cfg(not(target_arch = "wasm32"))]
114 #[cfg(not(feature = "static"))]
115 #[inline]
116 pub fn bind_to_library(
117 path: impl AsRef<Path>,
118 ) -> Result<Box<dyn PdfiumLibraryBindings>, PdfiumError> {
119 let bindings = DynamicPdfiumBindings::new(
120 unsafe { Library::new(path.as_ref().as_os_str()) }
121 .map_err(PdfiumError::LoadLibraryError)?,
122 )?;
123
124 #[cfg(feature = "thread_safe")]
125 let bindings = ThreadSafePdfiumBindings::new(bindings);
126
127 Ok(Box::new(bindings))
128 }
129
130 /// Returns the name of the external Pdfium library on the currently running platform.
131 /// On Linux and Android, this will be `libpdfium.so` or similar; on Windows, this will
132 /// be `pdfium.dll` or similar; on MacOS, this will be `libpdfium.dylib` or similar.
133 #[cfg(not(target_arch = "wasm32"))]
134 #[cfg(not(feature = "static"))]
135 #[inline]
136 pub fn pdfium_platform_library_name() -> OsString {
137 libloading::library_filename("pdfium")
138 }
139
140 /// Returns the name of the external Pdfium library on the currently running platform,
141 /// prefixed with the given path string.
142 #[cfg(not(target_arch = "wasm32"))]
143 #[cfg(not(feature = "static"))]
144 #[inline]
145 pub fn pdfium_platform_library_name_at_path(path: &(impl AsRef<Path> + ?Sized)) -> PathBuf {
146 path.as_ref().join(Pdfium::pdfium_platform_library_name())
147 }
148
149 /// Creates a new [Pdfium] instance from the given external Pdfium library bindings.
150 #[inline]
151 pub fn new(bindings: Box<dyn PdfiumLibraryBindings>) -> Self {
152 bindings.FPDF_InitLibrary();
153
154 Self { bindings }
155 }
156
157 // TODO: AJRC - 17/9/22 - remove deprecated Pdfium::get_bindings() function in 0.9.0
158 // as part of tracking issue https://github.com/ajrcarey/pdfium-render/issues/36
159 /// Returns the [PdfiumLibraryBindings] wrapped by this instance of [Pdfium].
160 #[deprecated(
161 since = "0.7.18",
162 note = "This function has been renamed. Use the Pdfium::bindings() function instead."
163 )]
164 #[doc(hidden)]
165 #[inline]
166 pub fn get_bindings(&self) -> &dyn PdfiumLibraryBindings {
167 self.bindings.as_ref()
168 }
169
170 /// Returns the [PdfiumLibraryBindings] wrapped by this instance of [Pdfium].
171 #[inline]
172 pub fn bindings(&self) -> &dyn PdfiumLibraryBindings {
173 self.bindings.as_ref()
174 }
175
176 // TODO: AJRC - 18/12/22 - remove deprecated Pdfium::load_pdf_from_bytes() function in 0.9.0
177 // as part of tracking issue https://github.com/ajrcarey/pdfium-render/issues/36
178 /// Returns the [PdfiumLibraryBindings] wrapped by this instance of [Pdfium].
179 #[deprecated(
180 since = "0.7.26",
181 note = "This function has been renamed. Use the Pdfium::load_pdf_from_byte_slice() function instead."
182 )]
183 #[doc(hidden)]
184 #[inline]
185 pub fn load_pdf_from_bytes(
186 &self,
187 bytes: &'static [u8],
188 password: Option<&str>,
189 ) -> Result<PdfDocument<'_>, PdfiumError> {
190 self.load_pdf_from_byte_slice(bytes, password)
191 }
192
193 /// Attempts to open a [PdfDocument] from the given static byte buffer.
194 ///
195 /// If the document is password protected, the given password will be used to unlock it.
196 pub fn load_pdf_from_byte_slice<'a>(
197 &'a self,
198 bytes: &'a [u8],
199 password: Option<&str>,
200 ) -> Result<PdfDocument<'a>, PdfiumError> {
201 Self::pdfium_document_handle_to_result(
202 self.bindings.FPDF_LoadMemDocument64(bytes, password),
203 self.bindings(),
204 )
205 }
206
207 /// Attempts to open a [PdfDocument] from the given owned byte buffer.
208 ///
209 /// If the document is password protected, the given password will be used to unlock it.
210 ///
211 /// `pdfium-render` will take ownership of the given byte buffer, ensuring its lifetime lasts
212 /// as long as the [PdfDocument] opened from it.
213 pub fn load_pdf_from_byte_vec(
214 &self,
215 bytes: Vec<u8>,
216 password: Option<&str>,
217 ) -> Result<PdfDocument<'_>, PdfiumError> {
218 Self::pdfium_document_handle_to_result(
219 self.bindings
220 .FPDF_LoadMemDocument64(bytes.as_slice(), password),
221 self.bindings(),
222 )
223 .map(|mut document| {
224 // Give the newly-created document ownership of the byte buffer, so that Pdfium can continue
225 // to read from it on an as-needed basis throughout the lifetime of the document.
226
227 document.set_source_byte_buffer(bytes);
228
229 document
230 })
231 }
232
233 /// Attempts to open a [PdfDocument] from the given file path.
234 ///
235 /// If the document is password protected, the given password will be used
236 /// to unlock it.
237 ///
238 /// This function is not available when compiling to WASM. You have several options for
239 /// loading your PDF document data in WASM:
240 /// * Use the [Pdfium::load_pdf_from_fetch()] function to download document data from a
241 /// URL using the browser's built-in `fetch` API. This function is only available when
242 /// compiling to WASM.
243 /// * Use the [Pdfium::load_pdf_from_blob()] function to load document data from a
244 /// Javascript `File` or `Blob` object (such as a `File` object returned from an HTML
245 /// `<input type="file">` element). This function is only available when compiling to WASM.
246 /// * Use another method to retrieve the bytes of the target document over the network,
247 /// then load those bytes into Pdfium using either the [Pdfium::load_pdf_from_byte_slice()]
248 /// function or the [Pdfium::load_pdf_from_byte_vec()] function.
249 /// * Embed the bytes of the target document directly into the compiled WASM module
250 /// using the `include_bytes!` macro.
251 #[cfg(not(target_arch = "wasm32"))]
252 pub fn load_pdf_from_file<'a>(
253 &'a self,
254 path: &(impl AsRef<Path> + ?Sized),
255 password: Option<&'a str>,
256 ) -> Result<PdfDocument<'a>, PdfiumError> {
257 self.load_pdf_from_reader(File::open(path).map_err(PdfiumError::IoError)?, password)
258 }
259
260 /// Attempts to open a [PdfDocument] from the given reader.
261 ///
262 /// Pdfium will only load the portions of the document it actually needs into memory.
263 /// This is more efficient than loading the entire document into memory, especially when
264 /// working with large documents, and allows for working with documents larger than the
265 /// amount of available memory.
266 ///
267 /// Because Pdfium must know the total content length in advance prior to loading
268 /// any portion of it, the given reader must implement the [Seek] trait as well as
269 /// the [Read] trait.
270 ///
271 /// If the document is password protected, the given password will be used
272 /// to unlock it.
273 ///
274 /// This function is not available when compiling to WASM. You have several options for
275 /// loading your PDF document data in WASM:
276 /// * Use the [Pdfium::load_pdf_from_fetch()] function to download document data from a
277 /// URL using the browser's built-in `fetch` API. This function is only available when
278 /// compiling to WASM.
279 /// * Use the [Pdfium::load_pdf_from_blob()] function to load document data from a
280 /// Javascript `File` or `Blob` object (such as a `File` object returned from an HTML
281 /// `<input type="file">` element). This function is only available when compiling to WASM.
282 /// * Use another method to retrieve the bytes of the target document over the network,
283 /// then load those bytes into Pdfium using either the [Pdfium::load_pdf_from_byte_slice()]
284 /// function or the [Pdfium::load_pdf_from_byte_vec()] function.
285 /// * Embed the bytes of the target document directly into the compiled WASM module
286 /// using the `include_bytes!` macro.
287 #[cfg(not(target_arch = "wasm32"))]
288 pub fn load_pdf_from_reader<'a, R: Read + Seek + 'a>(
289 &'a self,
290 reader: R,
291 password: Option<&'a str>,
292 ) -> Result<PdfDocument<'a>, PdfiumError> {
293 let mut reader = get_pdfium_file_accessor_from_reader(reader);
294
295 Pdfium::pdfium_document_handle_to_result(
296 self.bindings
297 .FPDF_LoadCustomDocument(reader.as_fpdf_file_access_mut_ptr(), password),
298 self.bindings(),
299 )
300 .map(|mut document| {
301 // Give the newly-created document ownership of the reader, so that Pdfium can continue
302 // to read from it on an as-needed basis throughout the lifetime of the document.
303
304 document.set_file_access_reader(reader);
305
306 document
307 })
308 }
309
310 /// Attempts to open a [PdfDocument] by loading document data from the given URL.
311 /// The Javascript `fetch` API is used to download data over the network.
312 ///
313 /// If the document is password protected, the given password will be used to unlock it.
314 ///
315 /// This function is only available when compiling to WASM.
316 #[cfg(any(doc, target_arch = "wasm32"))]
317 pub async fn load_pdf_from_fetch<'a>(
318 &'a self,
319 url: impl ToString,
320 password: Option<&str>,
321 ) -> Result<PdfDocument<'a>, PdfiumError> {
322 if let Some(window) = window() {
323 let fetch_result = JsFuture::from(window.fetch_with_str(url.to_string().as_str()))
324 .await
325 .map_err(PdfiumError::WebSysFetchError)?;
326
327 debug_assert!(fetch_result.is_instance_of::<Response>());
328
329 let response: Response = fetch_result
330 .dyn_into()
331 .map_err(|_| PdfiumError::WebSysInvalidResponseError)?;
332
333 let blob: Blob =
334 JsFuture::from(response.blob().map_err(PdfiumError::WebSysFetchError)?)
335 .await
336 .map_err(PdfiumError::WebSysFetchError)?
337 .into();
338
339 self.load_pdf_from_blob(blob, password).await
340 } else {
341 Err(PdfiumError::WebSysWindowObjectNotAvailable)
342 }
343 }
344
345 /// Attempts to open a [PdfDocument] by loading document data from the given `Blob`.
346 /// A `File` object returned from a `FileList` is a suitable `Blob`:
347 ///
348 /// ```text
349 /// <input id="filePicker" type="file">
350 ///
351 /// const file = document.getElementById('filePicker').files[0];
352 /// ```
353 ///
354 /// If the document is password protected, the given password will be used to unlock it.
355 ///
356 /// This function is only available when compiling to WASM.
357 #[cfg(any(doc, target_arch = "wasm32"))]
358 pub async fn load_pdf_from_blob<'a>(
359 &'a self,
360 blob: Blob,
361 password: Option<&str>,
362 ) -> Result<PdfDocument<'a>, PdfiumError> {
363 let array_buffer: ArrayBuffer = JsFuture::from(blob.array_buffer())
364 .await
365 .map_err(PdfiumError::WebSysFetchError)?
366 .into();
367
368 let u8_array: Uint8Array = Uint8Array::new(&array_buffer);
369
370 let bytes: Vec<u8> = u8_array.to_vec();
371
372 self.load_pdf_from_byte_vec(bytes, password)
373 }
374
375 /// Creates a new, empty [PdfDocument] in memory.
376 pub fn create_new_pdf(&self) -> Result<PdfDocument<'_>, PdfiumError> {
377 Self::pdfium_document_handle_to_result(
378 self.bindings.FPDF_CreateNewDocument(),
379 self.bindings(),
380 )
381 .map(|mut document| {
382 document.set_version(PdfDocumentVersion::DEFAULT_VERSION);
383
384 document
385 })
386 }
387
388 /// Returns a [PdfDocument] from the given `FPDF_DOCUMENT` handle, if possible.
389 pub(crate) fn pdfium_document_handle_to_result(
390 handle: crate::bindgen::FPDF_DOCUMENT,
391 bindings: &dyn PdfiumLibraryBindings,
392 ) -> Result<PdfDocument<'_>, PdfiumError> {
393 if handle.is_null() {
394 // Retrieve the error code of the last error recorded by Pdfium.
395
396 if let Some(error) = match bindings.FPDF_GetLastError() as u32 {
397 crate::bindgen::FPDF_ERR_SUCCESS => None,
398 crate::bindgen::FPDF_ERR_UNKNOWN => Some(PdfiumInternalError::Unknown),
399 crate::bindgen::FPDF_ERR_FILE => Some(PdfiumInternalError::FileError),
400 crate::bindgen::FPDF_ERR_FORMAT => Some(PdfiumInternalError::FormatError),
401 crate::bindgen::FPDF_ERR_PASSWORD => Some(PdfiumInternalError::PasswordError),
402 crate::bindgen::FPDF_ERR_SECURITY => Some(PdfiumInternalError::SecurityError),
403 crate::bindgen::FPDF_ERR_PAGE => Some(PdfiumInternalError::PageError),
404 // The Pdfium documentation says "... if the previous SDK call succeeded, [then] the
405 // return value of this function is not defined". On Linux, at least, a return value
406 // of FPDF_ERR_SUCCESS seems to be consistently returned; on Windows, however, the
407 // return values are indeed unpredictable. See https://github.com/ajrcarey/pdfium-render/issues/24.
408 // Therefore, if the return value does not match one of the FPDF_ERR_* constants, we must
409 // assume success.
410 _ => None,
411 } {
412 Err(PdfiumError::PdfiumLibraryInternalError(error))
413 } else {
414 // This would be an unusual situation; a null handle indicating failure,
415 // yet Pdfium's error code indicates success.
416
417 Err(PdfiumError::PdfiumLibraryInternalError(
418 PdfiumInternalError::Unknown,
419 ))
420 }
421 } else {
422 Ok(PdfDocument::from_pdfium(handle, bindings))
423 }
424 }
425}
426
427impl Drop for Pdfium {
428 /// Closes the external Pdfium library, releasing held memory.
429 #[inline]
430 fn drop(&mut self) {
431 self.bindings.FPDF_DestroyLibrary();
432 }
433}
434
435impl Default for Pdfium {
436 /// Binds to a Pdfium library that was statically linked into the currently running
437 /// executable by calling [Pdfium::bind_to_statically_linked_library]. This function
438 /// will panic if no statically linked Pdfium functions can be located.
439 #[cfg(feature = "static")]
440 #[inline]
441 fn default() -> Self {
442 Pdfium::new(Pdfium::bind_to_statically_linked_library().unwrap())
443 }
444
445 /// Binds to an external Pdfium library by first attempting to bind to a Pdfium library
446 /// in the current working directory; if that fails, then a system-provided library
447 /// will be used as a fall back.
448 ///
449 /// This function will panic if no suitable Pdfium library can be loaded.
450 #[cfg(not(feature = "static"))]
451 #[cfg(not(target_arch = "wasm32"))]
452 #[inline]
453 fn default() -> Self {
454 let bindings = match Pdfium::bind_to_library(
455 // Attempt to bind to a Pdfium library in the current working directory...
456 Pdfium::pdfium_platform_library_name_at_path("./"),
457 ) {
458 Ok(bindings) => Ok(bindings),
459 Err(PdfiumError::LoadLibraryError(err)) => {
460 match err {
461 libloading::Error::DlOpen { .. } => {
462 // For DlOpen errors specifically, indicating the Pdfium library in the
463 // current working directory does not exist or is corrupted, we attempt
464 // to fall back to a system-provided library.
465
466 Pdfium::bind_to_system_library()
467 }
468 _ => Err(PdfiumError::LoadLibraryError(err)),
469 }
470 }
471 Err(err) => Err(err),
472 };
473
474 Pdfium::new(bindings.unwrap())
475 }
476
477 /// Binds to an external Pdfium library by attempting to a system-provided library.
478 ///
479 /// This function will panic if no suitable Pdfium library can be loaded.
480 #[cfg(target_arch = "wasm32")]
481 fn default() -> Self {
482 Pdfium::new(Pdfium::bind_to_system_library().unwrap())
483 }
484}
485
486impl Debug for Pdfium {
487 #[inline]
488 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
489 f.debug_struct("Pdfium").finish()
490 }
491}
492
493#[cfg(feature = "sync")]
494unsafe impl Sync for Pdfium {}
495
496#[cfg(feature = "sync")]
497unsafe impl Send for Pdfium {}