pub trait PdfBackend {
type Document;
type Page;
type Error: Error + Into<PdfError>;
Show 21 methods
// Required methods
fn open(bytes: &[u8]) -> Result<Self::Document, Self::Error>;
fn open_with_password(
bytes: &[u8],
password: &[u8],
) -> Result<Self::Document, Self::Error>;
fn page_count(doc: &Self::Document) -> usize;
fn get_page(
doc: &Self::Document,
index: usize,
) -> Result<Self::Page, Self::Error>;
fn page_media_box(
doc: &Self::Document,
page: &Self::Page,
) -> Result<BBox, Self::Error>;
fn page_crop_box(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Option<BBox>, Self::Error>;
fn page_trim_box(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Option<BBox>, Self::Error>;
fn page_bleed_box(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Option<BBox>, Self::Error>;
fn page_art_box(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Option<BBox>, Self::Error>;
fn page_rotate(
doc: &Self::Document,
page: &Self::Page,
) -> Result<i32, Self::Error>;
fn document_metadata(
doc: &Self::Document,
) -> Result<DocumentMetadata, Self::Error>;
fn document_bookmarks(
doc: &Self::Document,
) -> Result<Vec<Bookmark>, Self::Error>;
fn page_annotations(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Vec<Annotation>, Self::Error>;
fn page_hyperlinks(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Vec<Hyperlink>, Self::Error>;
fn interpret_page(
doc: &Self::Document,
page: &Self::Page,
handler: &mut dyn ContentHandler,
options: &ExtractOptions,
) -> Result<(), Self::Error>;
fn document_form_fields(
doc: &Self::Document,
) -> Result<Vec<FormField>, Self::Error>;
fn document_structure_tree(
doc: &Self::Document,
) -> Result<Vec<StructElement>, Self::Error>;
fn extract_image_content(
doc: &Self::Document,
page: &Self::Page,
image_name: &str,
) -> Result<ImageContent, Self::Error>;
// Provided methods
fn validate(
doc: &Self::Document,
) -> Result<Vec<ValidationIssue>, Self::Error> { ... }
fn document_signatures(
doc: &Self::Document,
) -> Result<Vec<SignatureInfo>, Self::Error> { ... }
fn repair(
bytes: &[u8],
options: &RepairOptions,
) -> Result<(Vec<u8>, RepairResult), Self::Error> { ... }
}Expand description
Trait abstracting PDF parsing operations.
A backend provides methods to open PDF documents, access pages,
extract page properties (MediaBox, CropBox, Rotate), and interpret
page content streams via a ContentHandler callback.
§Associated Types
Document: The parsed PDF document representation.Page: A reference to a single page within a document.Error: Backend-specific error type, convertible toPdfError.
§Usage
let doc = MyBackend::open(pdf_bytes)?;
let page_count = MyBackend::page_count(&doc);
let page = MyBackend::get_page(&doc, 0)?;
let media_box = MyBackend::page_media_box(&doc, &page)?;
MyBackend::interpret_page(&doc, &page, &mut handler, &options)?;Required Associated Types§
Required Methods§
Sourcefn open(bytes: &[u8]) -> Result<Self::Document, Self::Error>
fn open(bytes: &[u8]) -> Result<Self::Document, Self::Error>
Parse PDF bytes into a document.
§Errors
Returns an error if the bytes do not represent a valid PDF document.
If the document is encrypted, returns PdfError::PasswordRequired.
Sourcefn open_with_password(
bytes: &[u8],
password: &[u8],
) -> Result<Self::Document, Self::Error>
fn open_with_password( bytes: &[u8], password: &[u8], ) -> Result<Self::Document, Self::Error>
Parse PDF bytes into a document, decrypting with the given password.
Supports both user and owner passwords. If the PDF is not encrypted, the password is ignored and the document opens normally.
§Errors
Returns PdfError::InvalidPassword if the password is incorrect.
Returns other errors if the bytes are not a valid PDF document.
Sourcefn page_count(doc: &Self::Document) -> usize
fn page_count(doc: &Self::Document) -> usize
Return the number of pages in the document.
Sourcefn get_page(
doc: &Self::Document,
index: usize,
) -> Result<Self::Page, Self::Error>
fn get_page( doc: &Self::Document, index: usize, ) -> Result<Self::Page, Self::Error>
Access a page by 0-based index.
§Errors
Returns an error if the index is out of range or the page cannot be loaded.
Sourcefn page_media_box(
doc: &Self::Document,
page: &Self::Page,
) -> Result<BBox, Self::Error>
fn page_media_box( doc: &Self::Document, page: &Self::Page, ) -> Result<BBox, Self::Error>
Get the MediaBox for a page.
MediaBox is required by the PDF specification and defines the boundaries
of the physical page. The returned BBox uses the library’s top-left
origin coordinate system.
§Errors
Returns an error if the MediaBox cannot be resolved (e.g., missing from both the page and its parent page tree).
Sourcefn page_crop_box(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Option<BBox>, Self::Error>
fn page_crop_box( doc: &Self::Document, page: &Self::Page, ) -> Result<Option<BBox>, Self::Error>
Get the CropBox for a page, if explicitly set.
CropBox defines the visible region of the page. Returns None if
not explicitly set (in which case MediaBox serves as the CropBox).
§Errors
Returns an error if the CropBox entry exists but is malformed.
Sourcefn page_trim_box(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Option<BBox>, Self::Error>
fn page_trim_box( doc: &Self::Document, page: &Self::Page, ) -> Result<Option<BBox>, Self::Error>
Get the TrimBox for a page, if explicitly set.
TrimBox defines the intended dimensions of the finished page after
trimming. Returns None if not set. Supports inheritance from
parent page tree nodes.
§Errors
Returns an error if the TrimBox entry exists but is malformed.
Sourcefn page_bleed_box(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Option<BBox>, Self::Error>
fn page_bleed_box( doc: &Self::Document, page: &Self::Page, ) -> Result<Option<BBox>, Self::Error>
Get the BleedBox for a page, if explicitly set.
BleedBox defines the region to which page contents should be clipped
when output in a production environment. Returns None if not set.
Supports inheritance from parent page tree nodes.
§Errors
Returns an error if the BleedBox entry exists but is malformed.
Sourcefn page_art_box(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Option<BBox>, Self::Error>
fn page_art_box( doc: &Self::Document, page: &Self::Page, ) -> Result<Option<BBox>, Self::Error>
Get the ArtBox for a page, if explicitly set.
ArtBox defines the extent of the page’s meaningful content as intended
by the page’s creator. Returns None if not set. Supports inheritance
from parent page tree nodes.
§Errors
Returns an error if the ArtBox entry exists but is malformed.
Sourcefn page_rotate(
doc: &Self::Document,
page: &Self::Page,
) -> Result<i32, Self::Error>
fn page_rotate( doc: &Self::Document, page: &Self::Page, ) -> Result<i32, Self::Error>
Get the page rotation angle in degrees.
Returns one of: 0, 90, 180, or 270. Defaults to 0 if not specified.
§Errors
Returns an error if the Rotate entry exists but is malformed.
Sourcefn document_metadata(
doc: &Self::Document,
) -> Result<DocumentMetadata, Self::Error>
fn document_metadata( doc: &Self::Document, ) -> Result<DocumentMetadata, Self::Error>
Extract document-level metadata from the PDF /Info dictionary.
Returns a DocumentMetadata containing title, author, subject,
keywords, creator, producer, creation date, and modification date.
Fields not present in the PDF are returned as None.
§Errors
Returns an error if the /Info dictionary exists but is malformed.
Sourcefn document_bookmarks(
doc: &Self::Document,
) -> Result<Vec<Bookmark>, Self::Error>
fn document_bookmarks( doc: &Self::Document, ) -> Result<Vec<Bookmark>, Self::Error>
Extract the document outline (bookmarks / table of contents).
Returns a flat list of Bookmarks representing the outline tree,
with each bookmark’s level indicating its depth. Returns an empty
Vec if the document has no /Outlines dictionary.
§Errors
Returns an error if the /Outlines dictionary exists but is malformed.
Sourcefn page_annotations(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Vec<Annotation>, Self::Error>
fn page_annotations( doc: &Self::Document, page: &Self::Page, ) -> Result<Vec<Annotation>, Self::Error>
Extract annotations from a page.
Returns a list of Annotations found in the page’s /Annots array.
Returns an empty Vec if the page has no annotations.
§Errors
Returns an error if the /Annots array exists but is malformed.
Sourcefn page_hyperlinks(
doc: &Self::Document,
page: &Self::Page,
) -> Result<Vec<Hyperlink>, Self::Error>
fn page_hyperlinks( doc: &Self::Document, page: &Self::Page, ) -> Result<Vec<Hyperlink>, Self::Error>
Sourcefn interpret_page(
doc: &Self::Document,
page: &Self::Page,
handler: &mut dyn ContentHandler,
options: &ExtractOptions,
) -> Result<(), Self::Error>
fn interpret_page( doc: &Self::Document, page: &Self::Page, handler: &mut dyn ContentHandler, options: &ExtractOptions, ) -> Result<(), Self::Error>
Interpret the page’s content stream, calling back into the handler.
The interpreter processes PDF content stream operators (text, path,
image) and notifies the handler of extracted content via
ContentHandler callbacks. Resource limits from options are
enforced during interpretation.
§Errors
Returns an error if content stream parsing fails or a resource limit is exceeded.
Sourcefn document_form_fields(
doc: &Self::Document,
) -> Result<Vec<FormField>, Self::Error>
fn document_form_fields( doc: &Self::Document, ) -> Result<Vec<FormField>, Self::Error>
Extract form fields from the document’s AcroForm dictionary.
Returns a list of FormFields from the /AcroForm dictionary in
the document catalog. Walks the field tree recursively, handling
/Kids for hierarchical fields. Returns an empty Vec if the document
has no AcroForm.
§Errors
Returns an error if the AcroForm exists but is malformed.
Sourcefn document_structure_tree(
doc: &Self::Document,
) -> Result<Vec<StructElement>, Self::Error>
fn document_structure_tree( doc: &Self::Document, ) -> Result<Vec<StructElement>, Self::Error>
Extract the document’s structure tree from /StructTreeRoot.
Returns the structure tree elements for tagged PDFs. Each element has a type (e.g., “H1”, “P”, “Table”), MCIDs linking to page content, and child elements forming a tree. Returns an empty Vec if the document has no structure tree (untagged PDF).
§Errors
Returns an error if the structure tree exists but is malformed.
Sourcefn extract_image_content(
doc: &Self::Document,
page: &Self::Page,
image_name: &str,
) -> Result<ImageContent, Self::Error>
fn extract_image_content( doc: &Self::Document, page: &Self::Page, image_name: &str, ) -> Result<ImageContent, Self::Error>
Extract image content (raw bytes) from a named image XObject on a page.
Locates the image XObject by name in the page’s /Resources/XObject
dictionary and extracts its stream data. For DCTDecode (JPEG) images,
returns the raw JPEG bytes. For FlateDecode images, decompresses and
returns raw pixel data. Handles chained filters.
§Errors
Returns an error if the image XObject is not found or stream decoding fails.
Provided Methods§
Sourcefn validate(doc: &Self::Document) -> Result<Vec<ValidationIssue>, Self::Error>
fn validate(doc: &Self::Document) -> Result<Vec<ValidationIssue>, Self::Error>
Validate the PDF document and report specification violations.
Checks for common PDF specification issues such as missing required
keys, broken object references, invalid page tree structure, and
missing fonts. Returns a list of ValidationIssues describing
any problems found.
An empty result indicates no issues were detected.
§Errors
Returns an error if the document structure is too corrupted to perform validation.
Sourcefn document_signatures(
doc: &Self::Document,
) -> Result<Vec<SignatureInfo>, Self::Error>
fn document_signatures( doc: &Self::Document, ) -> Result<Vec<SignatureInfo>, Self::Error>
Extract digital signature information from the document.
Returns a list of SignatureInfos for each signature field
(/FT /Sig) found in the /AcroForm dictionary. Both signed
and unsigned signature fields are included.
Returns an empty Vec if the document has no signature fields.
§Errors
Returns an error if the AcroForm exists but is malformed.
Sourcefn repair(
bytes: &[u8],
options: &RepairOptions,
) -> Result<(Vec<u8>, RepairResult), Self::Error>
fn repair( bytes: &[u8], options: &RepairOptions, ) -> Result<(Vec<u8>, RepairResult), Self::Error>
Attempt to repair common PDF issues in the raw bytes.
Takes the original PDF bytes and repair options, applies best-effort fixes, and returns the repaired bytes along with a log of what was fixed. The caller can then open the repaired bytes normally.
§Errors
Returns an error if the PDF is too corrupted to attempt repair.
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.