Skip to main content

PdfDocument

Struct PdfDocument 

Source
pub struct PdfDocument { /* private fields */ }
Expand description

High-level PDF document handle.

Implementations§

Source§

impl PdfDocument

Source

pub fn open(data: impl Into<PdfData>) -> Result<Self>

Open a PDF from bytes.

Source

pub fn open_with_processing_limits( data: impl Into<PdfData>, limits: ProcessingLimits, ) -> Result<Self>

Open a PDF from bytes with processing limits.

Source

pub fn open_with_password( data: impl Into<PdfData>, password: &str, ) -> Result<Self>

Open a password-protected PDF.

Source

pub fn open_with_password_and_processing_limits( data: impl Into<PdfData>, password: &str, limits: ProcessingLimits, ) -> Result<Self>

Open a password-protected PDF with processing limits.

Source

pub fn pdf(&self) -> &Pdf

Access the underlying parsed PDF.

Source

pub fn set_settings(&mut self, settings: InterpreterSettings)

Set interpreter settings (font resolver, cmap resolver, etc.).

Source

pub fn page_count(&self) -> usize

Number of pages.

Source

pub fn page_geometry(&self, index: usize) -> Result<PageGeometry>

Get the geometry of a page.

Source

pub fn render_page( &self, index: usize, options: &RenderOptions, ) -> Result<RenderedPage>

Render a single page.

If the document contains an XFA template, it is automatically flattened to static PDF content before rendering. This prevents the “Please wait” placeholder page that Adobe Reader would show when rendering an XFA PDF with a conventional renderer. If flattening fails, rendering falls back to the original document as a best-effort path.

Source

pub fn render_page_with_config( &self, index: usize, config: &RenderConfig, ) -> Result<RenderedPage>

Render a single page using the high-level render config.

XFA documents are auto-flattened before rendering (same as render_page). If flattening fails, rendering falls back to the original document.

Source

pub fn render_page_cmyk(&self, index: usize, dpi: u32) -> Result<RenderedPage>

Render a single page to a CMYK buffer.

Source

pub fn render_all(&self, options: &RenderOptions) -> Vec<RenderedPage>

Render all pages, in parallel when the parallel feature is enabled.

Source

pub fn render_all_with_config(&self, config: &RenderConfig) -> Vec<RenderedPage>

Render all pages using the high-level render config.

Source

pub fn thumbnail( &self, index: usize, options: &ThumbnailOptions, ) -> Result<RenderedPage>

Generate a thumbnail for a single page.

Source

pub fn thumbnails_all(&self, options: &ThumbnailOptions) -> Vec<RenderedPage>

Generate thumbnails for all pages, in parallel when the parallel feature is enabled.

Source

pub fn extract_text(&self, index: usize) -> Result<String>

Extract text from a page as a single string.

Source

pub fn extract_text_blocks(&self, index: usize) -> Result<Vec<TextBlock>>

Extract structured text blocks from a page.

Source

pub fn extract_all_text_blocks(&self) -> Vec<Vec<TextBlock>>

Extract structured text blocks from all pages, reusing interpreter settings.

Source

pub fn extract_acroform_text(&self) -> String

Extract text values from AcroForm fields, including push-button captions.

Returns a single string concatenating all non-empty field values separated by newlines. Useful when the document stores its readable content in form field values rather than (or in addition to) page content streams.

Source

pub fn extract_all_text(&self) -> String

Extract all text from the document: page content streams plus AcroForm field values. Mirrors pdftotext behaviour.

When the xfa feature is enabled and the document is an XFA form, the raw page content stream typically contains only an Adobe-Reader placeholder ("The document you are trying to load requires Adobe Reader 8 or higher…"). For those cases we transparently re-extract from the flattened representation so callers get the rendered form content. The raw path is preferred whenever it produces non-trivially more text than the flattened path, which preserves existing behaviour for non-XFA PDFs and for XFA PDFs that already carry their content as ordinary text operators.

Source

pub fn search_text(&self, query: &str) -> Vec<usize>

Simple text search: returns page indices containing the query string.

Source

pub fn info(&self) -> DocumentInfo

Extract document metadata.

Source

pub fn bookmarks(&self) -> Vec<BookmarkItem>

Extract document outline / bookmarks.

Source

pub fn ocr_page( &self, index: usize, backend: &dyn OcrBackend, dpi: f64, ) -> Result<OcrResult>

Run OCR on a page and return the recognized text and word positions.

The page is rendered at dpi (default 150) before recognition. Pass any [OcrBackend] implementation; use [OcrsBackend::try_default] to load the pure-Rust ocrs engine from the standard model paths.

§Example
use pdf_engine::{PdfDocument, OcrsBackend, RenderOptions};

let doc = PdfDocument::open(std::fs::read("scan.pdf").unwrap()).unwrap();
let backend = OcrsBackend::try_default().unwrap();
let result = doc.ocr_page(0, &backend, 150.0_f64).unwrap();
println!("{}", result.text);

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, S> SimdFrom<T, S> for T
where S: Simd,

Source§

fn simd_from(value: T, _simd: S) -> T

Source§

impl<F, T, S> SimdInto<T, S> for F
where T: SimdFrom<F, S>, S: Simd,

Source§

fn simd_into(self, simd: S) -> T

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<T> ErasedDestructor for T
where T: 'static,