Skip to main content

PdfDocument

Struct PdfDocument 

Source
pub struct PdfDocument { /* private fields */ }
Expand description

High-level PDF document handle.

Implementations§

Source§

impl PdfDocument

Source

pub fn open(data: impl Into<PdfData>) -> Result<Self>

Open a PDF from bytes.

Source

pub fn open_with_processing_limits( data: impl Into<PdfData>, limits: ProcessingLimits, ) -> Result<Self>

Open a PDF from bytes with processing limits.

Source

pub fn open_with_password( data: impl Into<PdfData>, password: &str, ) -> Result<Self>

Open a password-protected PDF.

Source

pub fn open_with_password_and_processing_limits( data: impl Into<PdfData>, password: &str, limits: ProcessingLimits, ) -> Result<Self>

Open a password-protected PDF with processing limits.

Source

pub fn pdf(&self) -> &Pdf

Access the underlying parsed PDF.

Source

pub fn set_settings(&mut self, settings: InterpreterSettings)

Set interpreter settings (font resolver, cmap resolver, etc.).

Source

pub fn page_count(&self) -> usize

Number of pages.

Source

pub fn page_geometry(&self, index: usize) -> Result<PageGeometry>

Get the geometry of a page.

Source

pub fn render_page( &self, index: usize, options: &RenderOptions, ) -> Result<RenderedPage>

Render a single page.

If the document contains an XFA template, it is automatically flattened to static PDF content before rendering. This prevents the “Please wait” placeholder page that Adobe Reader would show when rendering an XFA PDF with a conventional renderer. If flattening fails, rendering falls back to the original document as a best-effort path.

Source

pub fn render_page_with_config( &self, index: usize, config: &RenderConfig, ) -> Result<RenderedPage>

Render a single page using the high-level render config.

XFA documents are auto-flattened before rendering (same as render_page). If flattening fails, rendering falls back to the original document.

Source

pub fn render_page_cmyk(&self, index: usize, dpi: u32) -> Result<RenderedPage>

Render a single page to a CMYK buffer.

Source

pub fn render_all(&self, options: &RenderOptions) -> Vec<RenderedPage>

Render all pages, in parallel when the parallel feature is enabled.

Source

pub fn render_all_with_config(&self, config: &RenderConfig) -> Vec<RenderedPage>

Render all pages using the high-level render config.

Source

pub fn thumbnail( &self, index: usize, options: &ThumbnailOptions, ) -> Result<RenderedPage>

Generate a thumbnail for a single page.

Source

pub fn thumbnails_all(&self, options: &ThumbnailOptions) -> Vec<RenderedPage>

Generate thumbnails for all pages, in parallel when the parallel feature is enabled.

Source

pub fn extract_text(&self, index: usize) -> Result<String>

Extract text from a page as a single string.

Source

pub fn extract_text_blocks(&self, index: usize) -> Result<Vec<TextBlock>>

Extract structured text blocks from a page.

Source

pub fn extract_all_text_blocks(&self) -> Vec<Vec<TextBlock>>

Extract structured text blocks from all pages, reusing interpreter settings.

Source

pub fn extract_acroform_text(&self) -> String

Extract text values from AcroForm fields, including push-button captions.

Returns a single string concatenating all non-empty field values separated by newlines. Useful when the document stores its readable content in form field values rather than (or in addition to) page content streams.

Source

pub fn extract_all_text(&self) -> String

Extract all text from the document: page content streams plus AcroForm field values. Mirrors pdftotext behaviour.

Source

pub fn search_text(&self, query: &str) -> Vec<usize>

Simple text search: returns page indices containing the query string.

Source

pub fn info(&self) -> DocumentInfo

Extract document metadata.

Source

pub fn bookmarks(&self) -> Vec<BookmarkItem>

Extract document outline / bookmarks.

Source

pub fn ocr_page( &self, index: usize, backend: &dyn OcrBackend, dpi: f64, ) -> Result<OcrResult>

Run OCR on a page and return the recognized text and word positions.

The page is rendered at dpi (default 150) before recognition. Pass any [OcrBackend] implementation; use [OcrsBackend::try_default] to load the pure-Rust ocrs engine from the standard model paths.

§Example
use pdf_engine::{PdfDocument, OcrsBackend, RenderOptions};

let doc = PdfDocument::open(std::fs::read("scan.pdf").unwrap()).unwrap();
let backend = OcrsBackend::try_default().unwrap();
let result = doc.ocr_page(0, &backend, 150.0_f64).unwrap();
println!("{}", result.text);

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, S> SimdFrom<T, S> for T
where S: Simd,

Source§

fn simd_from(value: T, _simd: S) -> T

Source§

impl<F, T, S> SimdInto<T, S> for F
where T: SimdFrom<F, S>, S: Simd,

Source§

fn simd_into(self, simd: S) -> T

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> ErasedDestructor for T
where T: 'static,