#![forbid(unsafe_code)]
#![doc = "rpdfium — a faithful Rust port of Google's PDFium PDF rendering engine."]
pub mod arc;
pub use arc::{ArcDocument, ArcLibrary, ArcPage};
mod image_decode;
use std::sync::{Arc, OnceLock};
use rpdfium_core::Name;
use rpdfium_font::{DashMapFontCache, FontCache as _, FontRef, ResolvedFont};
use rpdfium_page::display::{DisplayTree, walk};
use rpdfium_page::resource::ResourceDict;
use rpdfium_page::{InterpreterContext, collect_page_ids, interpret, resolve_resources};
use rpdfium_parser::{ObjectStore, tokenize_content_stream};
pub use rpdfium_core::error::{ObjectId, ParseError, PdfError, PdfResult};
pub use rpdfium_core::{PdfString, PdfStringEncoding};
pub use rpdfium_parser::object::{Object, StreamData};
pub use rpdfium_render::{
RenderError, RgbaColor, compute_page_transform, render, render_with_images,
};
pub use rpdfium_font::{
FolderFontScanner, FontMapper, FontMatch, FontRequest, FontWeight, GlyphUsageTracker,
base14_substitute, subset_truetype_font,
};
pub use rpdfium_doc::{
Action, Annotation, AnnotationBorder, AnnotationFlags, AnnotationSubtypeData, AnnotationType,
Bookmark, BorderStyle, Destination, DocError, DocMdpPermission, DocResult, DocumentMetadata,
FdfData, FieldValue, InteractiveForm, NameTree, NumberTree, PageLabel, PageLabelStyle,
SignatureObject, collect_signatures, export_fdf, format_label, import_fdf, parse_annotations,
parse_bookmarks, parse_destination, parse_metadata, parse_page_labels,
};
pub use rpdfium_text::{
CharOrigin, CharRect, CharType, Link, LinkKind, SearchOptions, SearchResult, TextCharacter,
TextExtractor, TextPage, TextPageFind, extract_links, search, search_case_insensitive,
search_consecutive, search_normalized, search_normalized_case_insensitive, search_whole_word,
search_whole_word_case_insensitive, segment_lines, segment_words,
};
pub use rpdfium_graphics::{Bitmap, BitmapFormat, Color};
pub use rpdfium_page::{DisplayNode, DisplayVisitor, OCContext, PageError, UsageType};
pub use rpdfium_core::{Matrix, OpenOptions, ParsingMode, Point, Rect, Size};
pub use rpdfium_render::RenderConfig;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error(transparent)]
Parse(#[from] PdfError),
#[error(transparent)]
Page(#[from] PageError),
#[error(transparent)]
Render(#[from] RenderError),
#[error(transparent)]
Doc(#[from] DocError),
#[error("page index out of range: {index} (document has {count} pages)")]
PageOutOfRange {
index: u32,
count: u32,
},
}
pub type Result<T> = std::result::Result<T, Error>;
pub(crate) struct FontCacheBridge<'a> {
pub(crate) font_cache: &'a DashMapFontCache,
pub(crate) store: &'a ObjectStore<Arc<[u8]>>,
pub(crate) resources: &'a ResourceDict,
}
impl rpdfium_page::FontCache for FontCacheBridge<'_> {
fn glyph_width(&self, font_name: &Name, char_code: u16) -> Option<f32> {
let font_id = self.resources.fonts.get(font_name)?;
let font_ref = FontRef::new(*font_id);
let resolved = self.font_cache.get_or_load(&font_ref, self.store).ok()?;
Some(resolved.char_width(char_code) as f32)
}
fn get_resolved_font(&self, font_name: &Name) -> Option<Arc<ResolvedFont>> {
let font_id = self.resources.fonts.get(font_name)?;
let font_ref = FontRef::new(*font_id);
self.font_cache.get_or_load(&font_ref, self.store).ok()
}
}
pub struct Library {
_private: (),
font_mapper: Option<Box<dyn FontMapper>>,
}
impl Library {
pub fn new() -> Self {
Self {
_private: (),
font_mapper: Some(Box::new(FolderFontScanner::new())),
}
}
pub fn with_font_mapper(mapper: Box<dyn FontMapper>) -> Self {
Self {
_private: (),
font_mapper: Some(mapper),
}
}
pub fn font_mapper(&self) -> Option<&dyn FontMapper> {
self.font_mapper.as_deref()
}
}
impl Default for Library {
fn default() -> Self {
Self::new()
}
}
pub struct Document<'lib> {
#[allow(dead_code)]
library: &'lib Library,
store: ObjectStore<Arc<[u8]>>,
font_cache: DashMapFontCache,
page_ids: Vec<ObjectId>,
catalog_id: ObjectId,
options: OpenOptions,
oc_context: Option<rpdfium_page::OCContext>,
}
impl<'lib> Document<'lib> {
pub fn open(
library: &'lib Library,
data: Vec<u8>,
options: &OpenOptions,
) -> Result<Document<'lib>> {
let arc_data: Arc<[u8]> = Arc::from(data);
let store = ObjectStore::open_with_password(
arc_data,
options.parsing_mode,
options.password.as_deref(),
)?;
let page_ids = collect_page_ids(&store)?;
let catalog_id = store.trailer().root;
let font_cache = DashMapFontCache::new();
let oc_context = rpdfium_page::OCContext::from_catalog(&store, catalog_id);
Ok(Document {
library,
store,
font_cache,
page_ids,
catalog_id,
options: options.clone(),
oc_context,
})
}
pub fn open_file(
library: &'lib Library,
path: impl AsRef<std::path::Path>,
options: &OpenOptions,
) -> Result<Document<'lib>> {
let data = std::fs::read(path).map_err(PdfError::Io)?;
Self::open(library, data, options)
}
pub fn page_count(&self) -> u32 {
self.page_ids.len() as u32
}
pub fn page(&self, index: u32) -> Result<Page<'_>> {
let count = self.page_count();
if index >= count {
return Err(Error::PageOutOfRange { index, count });
}
let page_dict_id = self.page_ids[index as usize];
let page_obj = self.store.resolve(page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(page_dict_id))?;
let media_box = parse_rect(page_dict, &Name::media_box(), &self.store)
.or_else(|| {
let inherited =
rpdfium_page::find_inherited_entry(&self.store, page_dict, &Name::media_box())
.ok()??;
parse_rect_from_obj(&inherited)
})
.unwrap_or(Rect::new(0.0, 0.0, 612.0, 792.0));
Ok(Page {
store: &self.store,
font_cache: &self.font_cache,
page_index: index,
page_dict_id,
media_box,
display_tree: OnceLock::new(),
options: &self.options,
oc_context: self.oc_context.as_ref(),
})
}
pub fn metadata(&self) -> Result<Option<DocumentMetadata>> {
match self.store.trailer().info {
Some(info_id) => {
let info_obj = self.store.resolve(info_id)?;
let meta = parse_metadata(info_obj, &self.store)?;
Ok(Some(meta))
}
None => Ok(None),
}
}
pub fn bookmarks(&self) -> Result<Vec<Bookmark>> {
let catalog_obj = self.store.resolve(self.catalog_id)?;
let bookmarks = parse_bookmarks(catalog_obj, &self.store)?;
Ok(bookmarks)
}
pub fn signatures(&self) -> Result<Vec<SignatureObject>> {
let catalog = self.store.resolve(self.catalog_id)?;
Ok(collect_signatures(catalog, &self.store)?)
}
pub fn store(&self) -> &ObjectStore<Arc<[u8]>> {
&self.store
}
}
pub struct Page<'doc> {
store: &'doc ObjectStore<Arc<[u8]>>,
font_cache: &'doc DashMapFontCache,
page_index: u32,
page_dict_id: ObjectId,
media_box: Rect,
display_tree: OnceLock<DisplayTree>,
options: &'doc OpenOptions,
oc_context: Option<&'doc rpdfium_page::OCContext>,
}
impl<'doc> Page<'doc> {
pub fn media_box(&self) -> Rect {
self.media_box
}
pub fn crop_box(&self) -> Result<Option<Rect>> {
let page_obj = self.store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
Ok(parse_rect(page_dict, &Name::crop_box(), self.store))
}
pub fn rotation(&self) -> Result<u32> {
let page_obj = self.store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
let rotation = page_dict
.get(&Name::rotate())
.and_then(|obj| self.store.deep_resolve(obj).ok().and_then(|o| o.as_i64()))
.unwrap_or(0);
Ok(rotation.rem_euclid(360) as u32)
}
pub fn interpret(&self) -> Result<&DisplayTree> {
if let Some(tree) = self.display_tree.get() {
return Ok(tree);
}
let tree = self.interpret_inner()?;
let _ = self.display_tree.set(tree);
Ok(self.display_tree.get().unwrap())
}
fn interpret_inner(&self) -> Result<DisplayTree> {
let page_obj = self.store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
let content_bytes = decode_page_contents(page_dict, self.store)?;
let operators = tokenize_content_stream(&content_bytes)?;
let resources = resolve_resources(self.store, page_dict)?;
let bridge = FontCacheBridge {
font_cache: self.font_cache,
store: self.store,
resources: &resources,
};
let ctx = InterpreterContext {
store: self.store,
font_cache: &bridge,
mode: self.options.parsing_mode,
oc_context: self.oc_context,
};
let tree = interpret(
&operators,
&ctx,
&resources,
self.options.max_operators_per_page,
)?;
Ok(tree)
}
pub fn render(&self, config: &RenderConfig) -> Result<rpdfium_graphics::Bitmap> {
let tree = self.interpret()?;
let decoder = image_decode::PdfImageDecoder::new(self.store);
let bitmap = rpdfium_render::render_with_images(tree, config, &decoder)?;
Ok(bitmap)
}
pub fn text(&self) -> Result<TextPage> {
let tree = self.interpret()?;
let mut extractor = TextExtractor::new();
walk(tree, &mut extractor);
let (characters, run_ids) = extractor.into_characters();
Ok(TextPage::new_with_run_ids(characters, run_ids, false))
}
pub fn annotations(&self) -> Result<Vec<Annotation>> {
let page_obj = self.store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
match page_dict.get(&Name::annots()) {
Some(annots_obj) => {
let annots = parse_annotations(annots_obj, self.store)?;
Ok(annots)
}
None => Ok(Vec::new()),
}
}
pub fn index(&self) -> u32 {
self.page_index
}
}
pub(crate) fn parse_rect(
dict: &std::collections::HashMap<Name, Object>,
key: &Name,
store: &ObjectStore<Arc<[u8]>>,
) -> Option<Rect> {
let obj = dict.get(key)?;
let resolved = store.deep_resolve(obj).ok()?;
parse_rect_from_obj(resolved)
}
pub(crate) fn parse_rect_from_obj(obj: &Object) -> Option<Rect> {
let arr = obj.as_array()?;
if arr.len() < 4 {
return None;
}
let vals: Vec<f64> = arr.iter().take(4).filter_map(|o| o.as_f64()).collect();
if vals.len() < 4 {
return None;
}
Some(Rect::new(vals[0], vals[1], vals[2], vals[3]))
}
pub(crate) fn decode_page_contents(
page_dict: &std::collections::HashMap<Name, Object>,
store: &ObjectStore<Arc<[u8]>>,
) -> std::result::Result<Vec<u8>, PdfError> {
let contents_obj = match page_dict.get(&Name::contents()) {
Some(obj) => obj,
None => return Ok(Vec::new()),
};
let resolved = store.deep_resolve(contents_obj)?;
match resolved {
Object::Stream { .. } => {
store.decode_stream(resolved)
}
Object::Array(arr) => {
let mut all_bytes = Vec::new();
for item in arr {
if let Some(ref_id) = item.as_reference() {
let stream_obj = store.resolve(ref_id)?;
if let Object::Stream { .. } = stream_obj {
let decoded = store.decode_stream(stream_obj)?;
if !all_bytes.is_empty() {
all_bytes.push(b' ');
}
all_bytes.extend_from_slice(&decoded);
}
}
}
Ok(all_bytes)
}
Object::Reference(id) => {
let stream_obj = store.resolve(*id)?;
if let Object::Stream { .. } = stream_obj {
store.decode_stream(stream_obj)
} else {
Ok(Vec::new())
}
}
_ => Ok(Vec::new()),
}
}