pdf_engine/lib.rs
1#![warn(missing_docs)]
2//! Unified PDF rendering engine.
3//!
4//! `pdf-engine` is the main public-facing API for reading and rendering PDF
5//! documents. It wraps the lower-level `pdf-syntax` / `pdf-interpret` /
6//! `pdf-render` stack and exposes a single [`PdfDocument`] handle for all
7//! common operations: page rendering, text extraction, thumbnails, metadata,
8//! bookmarks, and full-text search.
9//!
10//! # Quick Start
11//!
12//! ```no_run
13//! use std::sync::Arc;
14//! use pdf_engine::{PdfDocument, RenderOptions};
15//!
16//! // Load from bytes (accepts Arc<Vec<u8>>, Vec<u8>, or any Into<PdfData>).
17//! let data = Arc::new(std::fs::read("invoice.pdf").unwrap());
18//! let doc = PdfDocument::open(data).unwrap();
19//!
20//! println!("{} pages — {:?}", doc.page_count(), doc.info().title);
21//!
22//! // Render page 0 at 150 DPI → raw RGBA pixel data.
23//! let opts = RenderOptions { dpi: 150.0, ..Default::default() };
24//! let rendered = doc.render_page(0, &opts).unwrap();
25//! println!("{}×{} px", rendered.width, rendered.height);
26//!
27//! // Plain-text extraction.
28//! let text = doc.extract_text(0).unwrap();
29//! println!("{text}");
30//!
31//! // Structured text with per-span positions.
32//! for block in doc.extract_text_blocks(0).unwrap() {
33//! for span in &block.spans {
34//! println!(" [{:.0}, {:.0}] {}", span.x, span.y, span.text);
35//! }
36//! }
37//!
38//! // Full-text search — returns 0-based page indices.
39//! let hits = doc.search_text("total");
40//! println!("'total' found on {} page(s)", hits.len());
41//! ```
42//!
43//! # Key Types
44//!
45//! | Type | Description |
46//! |---|---|
47//! | [`BatchConfig`] / [`BatchResult`] | Worker-pool processing for many PDFs |
48//! | [`PdfDocument`] | Main document handle |
49//! | [`RenderConfig`] / [`RenderOptions`] | DPI, color mode, background colour, optional forced width/height |
50//! | [`RenderedPage`] | RGBA or CMYK pixel data (row-major, 4 bytes per pixel) |
51//! | [`PageGeometry`] | MediaBox, CropBox, TrimBox, BleedBox, rotation |
52//! | [`PageBox`] | A rectangle in PDF user-space points |
53//! | [`DocumentInfo`] | Title, author, subject, creator, producer |
54//! | [`TextBlock`] / [`TextSpan`] | Structured text with position and font size |
55//! | [`BookmarkItem`] | Outline node — title, target page, nested children |
56//! | [`ThumbnailOptions`] | Max-dimension constraint for thumbnail rendering |
57
58pub mod api;
59/// Public engine error type and stable error-code mapping. See
60/// [`api_error::PdfEngineError`].
61pub mod api_error;
62pub mod batch;
63pub mod color;
64pub mod document;
65pub mod error;
66pub mod geometry;
67pub mod limits;
68pub mod ocr;
69pub mod render;
70pub mod text;
71pub mod thumbnail;
72#[cfg(feature = "xfa")]
73pub mod xfa;
74
75pub use batch::{process_batch, BatchConfig, BatchResult, ErrorStrategy, PdfBatch};
76pub use color::preserve_device_cmyk;
77pub use document::{BookmarkItem, DocumentInfo, PdfDocument};
78pub use error::{EngineError, Result};
79pub use geometry::{PageBox, PageGeometry, PageRotation};
80pub use limits::{LimitError, ProcessingLimits};
81pub use ocr::{OcrBackend, OcrError, OcrResult, OcrWord};
82pub use render::{ColorMode, PixelFormat, RenderConfig, RenderOptions, RenderedPage};
83pub use text::{TextBlock, TextSpan};
84pub use thumbnail::ThumbnailOptions;
85
86#[cfg(not(target_arch = "wasm32"))]
87pub use ocr::best_available_backend;
88#[cfg(all(feature = "ocr", not(target_arch = "wasm32")))]
89pub use ocr::ocr_page_default;
90#[cfg(feature = "ocr-aws")]
91pub use ocr::AwsTextractBackend;
92#[cfg(feature = "ocr-azure")]
93pub use ocr::AzureDocIntelBackend;
94#[cfg(feature = "ocr-google")]
95pub use ocr::GoogleVisionBackend;
96#[cfg(feature = "ocr-mistral")]
97pub use ocr::MistralOcrBackend;
98#[cfg(feature = "ocr")]
99pub use ocr::OcrsBackend;
100#[cfg(feature = "ocr-onnx")]
101pub use ocr::PaddleOnnxBackend;