Skip to main content

pdf_engine/
lib.rs

1#![allow(missing_docs)]
2//! Unified PDF rendering engine.
3//!
4//! `pdf-engine` is the main public-facing API for reading and rendering PDF
5//! documents. It wraps the lower-level `pdf-syntax` / `pdf-interpret` /
6//! `pdf-render` stack and exposes a single [`PdfDocument`] handle for all
7//! common operations: page rendering, text extraction, thumbnails, metadata,
8//! bookmarks, and full-text search.
9//!
10//! # Quick Start
11//!
12//! ```no_run
13//! use std::sync::Arc;
14//! use pdf_engine::{PdfDocument, RenderOptions};
15//!
16//! // Load from bytes (accepts Arc<Vec<u8>>, Vec<u8>, or any Into<PdfData>).
17//! let data = Arc::new(std::fs::read("invoice.pdf").unwrap());
18//! let doc = PdfDocument::open(data).unwrap();
19//!
20//! println!("{} pages — {:?}", doc.page_count(), doc.info().title);
21//!
22//! // Render page 0 at 150 DPI → raw RGBA pixel data.
23//! let opts = RenderOptions { dpi: 150.0, ..Default::default() };
24//! let rendered = doc.render_page(0, &opts).unwrap();
25//! println!("{}×{} px", rendered.width, rendered.height);
26//!
27//! // Plain-text extraction.
28//! let text = doc.extract_text(0).unwrap();
29//! println!("{text}");
30//!
31//! // Structured text with per-span positions.
32//! for block in doc.extract_text_blocks(0).unwrap() {
33//!     for span in &block.spans {
34//!         println!("  [{:.0}, {:.0}] {}", span.x, span.y, span.text);
35//!     }
36//! }
37//!
38//! // Full-text search — returns 0-based page indices.
39//! let hits = doc.search_text("total");
40//! println!("'total' found on {} page(s)", hits.len());
41//! ```
42//!
43//! # Key Types
44//!
45//! | Type | Description |
46//! |---|---|
47//! | [`BatchConfig`] / [`BatchResult`] | Worker-pool processing for many PDFs |
48//! | [`PdfDocument`] | Main document handle |
49//! | [`RenderConfig`] / [`RenderOptions`] | DPI, color mode, background colour, optional forced width/height |
50//! | [`RenderedPage`] | RGBA or CMYK pixel data (row-major, 4 bytes per pixel) |
51//! | [`PageGeometry`] | MediaBox, CropBox, TrimBox, BleedBox, rotation |
52//! | [`PageBox`] | A rectangle in PDF user-space points |
53//! | [`DocumentInfo`] | Title, author, subject, creator, producer |
54//! | [`TextBlock`] / [`TextSpan`] | Structured text with position and font size |
55//! | [`BookmarkItem`] | Outline node — title, target page, nested children |
56//! | [`ThumbnailOptions`] | Max-dimension constraint for thumbnail rendering |
57
58pub mod api;
59pub mod api_error;
60pub mod batch;
61pub mod color;
62pub mod document;
63pub mod error;
64pub mod geometry;
65pub mod limits;
66pub mod ocr;
67pub mod render;
68pub mod text;
69pub mod thumbnail;
70#[cfg(feature = "xfa")]
71pub mod xfa;
72
73pub use batch::{process_batch, BatchConfig, BatchResult, ErrorStrategy, PdfBatch};
74pub use color::preserve_device_cmyk;
75pub use document::{BookmarkItem, DocumentInfo, PdfDocument};
76pub use error::{EngineError, Result};
77pub use geometry::{PageBox, PageGeometry, PageRotation};
78pub use limits::{LimitError, ProcessingLimits};
79pub use ocr::{OcrBackend, OcrError, OcrResult, OcrWord};
80pub use render::{ColorMode, PixelFormat, RenderConfig, RenderOptions, RenderedPage};
81pub use text::{TextBlock, TextSpan};
82pub use thumbnail::ThumbnailOptions;
83
84#[cfg(not(target_arch = "wasm32"))]
85pub use ocr::best_available_backend;
86#[cfg(all(feature = "ocr", not(target_arch = "wasm32")))]
87pub use ocr::ocr_page_default;
88#[cfg(feature = "ocr-aws")]
89pub use ocr::AwsTextractBackend;
90#[cfg(feature = "ocr-azure")]
91pub use ocr::AzureDocIntelBackend;
92#[cfg(feature = "ocr-google")]
93pub use ocr::GoogleVisionBackend;
94#[cfg(feature = "ocr-mistral")]
95pub use ocr::MistralOcrBackend;
96#[cfg(feature = "ocr")]
97pub use ocr::OcrsBackend;
98#[cfg(feature = "ocr-onnx")]
99pub use ocr::PaddleOnnxBackend;