#![forbid(unsafe_code)]
#![doc = "rpdfium — a faithful Rust port of Google's PDFium PDF rendering engine."]
pub mod arc;
pub use arc::{ArcDocument, ArcLibrary, ArcPage};
#[cfg(feature = "edit")]
pub mod edit;
mod image_decode;
use image_decode::{convert_to_rgba, get_dict_int, read_decode_array, resolve_image_color_space};
use std::sync::{Arc, OnceLock};
use rpdfium_font::{DashMapFontCache, FontCache as _, FontRef, ResolvedFont};
use rpdfium_page::display::{DisplayTree, walk};
use rpdfium_page::resource::ResourceDict;
use rpdfium_page::{InterpreterContext, collect_page_ids, interpret, resolve_resources};
use rpdfium_parser::{ObjectStore, tokenize_content_stream};
pub use rpdfium_core::error::{ObjectId, ParseError, PdfError, PdfResult};
pub use rpdfium_core::{Name, PdfString, PdfStringEncoding};
pub use rpdfium_parser::object::{Object, StreamData};
pub use rpdfium_render::{
RenderError, RgbaColor, compute_page_transform, render, render_with_images,
};
pub use rpdfium_font::{
FolderFontScanner, FontMapper, FontMatch, FontRequest, FontWeight, GlyphUsageTracker,
base14_substitute, subset_truetype_font,
};
pub use rpdfium_doc::{
Action, ActionType, Annotation, AnnotationBorder, AnnotationFlags, AnnotationSubtypeData,
AnnotationType, AttributeValue, Bookmark, BorderStyle, Destination, DocError, DocMdpPermission,
DocResult, DocumentMetadata, DuplexMode, ElementsForPage, FdfData, FieldValue, FileSpec,
FormFieldFlags, HitTestResult, InteractiveForm, JavaScriptAction, LinkObject, McidMapping,
NameTree, NumberTree, PageFit, PageLabel, PageLabelStyle, PageMode, PageStructure, PdfFormType,
ReadingDirection, SignatureObject, StructAttribute, StructElement, StructTree,
ViewerPreferences, collect_attachments, collect_javascript_actions, collect_links,
collect_named_destinations, collect_signatures, export_fdf, find_bookmark,
find_link_at_position, format_label, import_fdf, is_tagged, link_at_point, link_enumerate,
link_get_link_at_point, next_sibling_bookmark, page_mode, parse_annotations, parse_bookmarks,
parse_destination, parse_metadata, parse_page_labels,
};
#[allow(deprecated)]
pub use rpdfium_doc::{enumerate, enumerate_links, get_bookmark_by_title, get_link_at_point};
pub use rpdfium_parser::PdfVersion;
pub use rpdfium_text::{
CharOrigin, CharRect, CharType, Link, LinkKind, SearchOptions, SearchResult, TextCharacter,
TextExtractor, TextPage, TextPageFind, extract_links, search, search_case_insensitive,
search_consecutive, search_normalized, search_normalized_case_insensitive, search_whole_word,
search_whole_word_case_insensitive, segment_lines, segment_words,
};
pub use rpdfium_graphics::{Bitmap, BitmapFormat, Color};
pub use rpdfium_page::{DisplayNode, DisplayVisitor, OCContext, PageError, UsageType};
pub use rpdfium_core::{Matrix, OpenOptions, ParsingMode, Point, Rect, Size};
pub use rpdfium_render::{ColorScheme, RenderConfig};
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error(transparent)]
Parse(#[from] PdfError),
#[error(transparent)]
Page(#[from] PageError),
#[error(transparent)]
Render(#[from] RenderError),
#[error(transparent)]
Doc(#[from] DocError),
#[error("page index out of range: {index} (document has {count} pages)")]
PageOutOfRange {
index: u32,
count: u32,
},
}
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(usize)]
pub enum FileIdentifierType {
Permanent = 0,
Changing = 1,
}
pub(crate) struct FontCacheBridge<'a> {
pub(crate) font_cache: &'a DashMapFontCache,
pub(crate) store: &'a ObjectStore<Arc<[u8]>>,
pub(crate) resources: &'a ResourceDict,
}
impl rpdfium_page::FontCache for FontCacheBridge<'_> {
fn glyph_width(&self, font_name: &Name, char_code: u16) -> Option<f32> {
let font_id = self.resources.fonts.get(font_name)?;
let font_ref = FontRef::new(*font_id);
let resolved = self.font_cache.get_or_load(&font_ref, self.store).ok()?;
Some(resolved.char_width(char_code) as f32)
}
fn get_resolved_font(&self, font_name: &Name) -> Option<Arc<ResolvedFont>> {
let font_id = self.resources.fonts.get(font_name)?;
let font_ref = FontRef::new(*font_id);
self.font_cache.get_or_load(&font_ref, self.store).ok()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PageActionType {
Open = 0,
Close = 1,
}
impl PageActionType {
pub fn pdf_key(self) -> &'static str {
match self {
Self::Open => "O",
Self::Close => "C",
}
}
}
pub trait PdfReader: Send + Sync {
fn file_len(&self) -> u64;
fn read_at(&self, offset: u64, buf: &mut [u8]) -> std::io::Result<usize>;
}
pub struct Library {
_private: (),
font_mapper: Option<Box<dyn FontMapper>>,
}
impl Library {
pub fn new() -> Self {
Self {
_private: (),
font_mapper: Some(Box::new(FolderFontScanner::new())),
}
}
pub fn with_font_mapper(mapper: Box<dyn FontMapper>) -> Self {
Self {
_private: (),
font_mapper: Some(mapper),
}
}
pub fn font_mapper(&self) -> Option<&dyn FontMapper> {
self.font_mapper.as_deref()
}
}
impl Default for Library {
fn default() -> Self {
Self::new()
}
}
pub struct Document<'lib> {
#[allow(dead_code)]
library: &'lib Library,
store: ObjectStore<Arc<[u8]>>,
font_cache: DashMapFontCache,
page_ids: Vec<ObjectId>,
catalog_id: ObjectId,
options: OpenOptions,
oc_context: Option<rpdfium_page::OCContext>,
}
impl<'lib> Document<'lib> {
pub fn open(
library: &'lib Library,
data: Vec<u8>,
options: &OpenOptions,
) -> Result<Document<'lib>> {
let arc_data: Arc<[u8]> = Arc::from(data);
let store = ObjectStore::open_with_password(
arc_data,
options.parsing_mode,
options.password.as_deref(),
)?;
let page_ids = collect_page_ids(&store)?;
let catalog_id = store.trailer().root;
let font_cache = DashMapFontCache::new();
let oc_context = rpdfium_page::OCContext::from_catalog(&store, catalog_id);
Ok(Document {
library,
store,
font_cache,
page_ids,
catalog_id,
options: options.clone(),
oc_context,
})
}
#[inline]
pub fn load_mem_document(
library: &'lib Library,
data: Vec<u8>,
options: &OpenOptions,
) -> Result<Document<'lib>> {
Self::open(library, data, options)
}
pub fn open_file(
library: &'lib Library,
path: impl AsRef<std::path::Path>,
options: &OpenOptions,
) -> Result<Document<'lib>> {
let data = std::fs::read(path).map_err(PdfError::Io)?;
Self::open(library, data, options)
}
#[inline]
pub fn load_document(
library: &'lib Library,
path: impl AsRef<std::path::Path>,
options: &OpenOptions,
) -> Result<Document<'lib>> {
Self::open_file(library, path, options)
}
pub fn open_custom(
library: &'lib Library,
reader: impl PdfReader,
options: &OpenOptions,
) -> Result<Self> {
let len = reader.file_len() as usize;
let mut data = vec![0u8; len];
let mut offset = 0;
while offset < data.len() {
let n = reader
.read_at(offset as u64, &mut data[offset..])
.map_err(PdfError::Io)?;
if n == 0 {
break;
}
offset += n;
}
data.truncate(offset);
Self::open(library, data, options)
}
#[inline]
pub fn load_custom_document(
library: &'lib Library,
reader: impl PdfReader,
options: &OpenOptions,
) -> Result<Self> {
Self::open_custom(library, reader, options)
}
pub fn page_count(&self) -> u32 {
self.page_ids.len() as u32
}
#[inline]
pub fn get_page_count(&self) -> u32 {
self.page_count()
}
pub fn page(&self, index: u32) -> Result<Page<'_>> {
let count = self.page_count();
if index >= count {
return Err(Error::PageOutOfRange { index, count });
}
let page_dict_id = self.page_ids[index as usize];
let page_obj = self.store.resolve(page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(page_dict_id))?;
let media_box = parse_rect(page_dict, &Name::media_box(), &self.store)
.or_else(|| {
let inherited =
rpdfium_page::find_inherited_entry(&self.store, page_dict, &Name::media_box())
.ok()??;
parse_rect_from_obj(&inherited)
})
.unwrap_or(Rect::new(0.0, 0.0, 612.0, 792.0));
Ok(Page {
store: &self.store,
font_cache: &self.font_cache,
page_index: index,
page_dict_id,
media_box,
display_tree: OnceLock::new(),
options: &self.options,
oc_context: self.oc_context.as_ref(),
})
}
#[inline]
pub fn load_page(&self, index: u32) -> Result<Page<'_>> {
self.page(index)
}
pub fn metadata(&self) -> Result<Option<DocumentMetadata>> {
match self.store.trailer().info {
Some(info_id) => {
let info_obj = self.store.resolve(info_id)?;
let meta = parse_metadata(info_obj, &self.store)?;
Ok(Some(meta))
}
None => Ok(None),
}
}
pub fn bookmarks(&self) -> Result<Vec<Bookmark>> {
let catalog_obj = self.store.resolve(self.catalog_id)?;
let bookmarks = parse_bookmarks(catalog_obj, &self.store)?;
Ok(bookmarks)
}
pub fn find_bookmark(&self, title: &str) -> Result<Option<Bookmark>> {
let bookmarks = self.bookmarks()?;
Ok(find_bookmark(&bookmarks, title).cloned())
}
#[inline]
pub fn bookmark_find(&self, title: &str) -> Result<Option<Bookmark>> {
self.find_bookmark(title)
}
#[deprecated(note = "use `bookmark_find()` — matches upstream `FPDFBookmark_Find`")]
#[inline]
pub fn find(&self, title: &str) -> Result<Option<Bookmark>> {
self.find_bookmark(title)
}
#[deprecated(note = "use `bookmark_find()` — matches upstream `FPDFBookmark_Find`")]
#[inline]
pub fn get_bookmark_by_title(&self, title: &str) -> Result<Option<Bookmark>> {
self.find_bookmark(title)
}
pub fn next_sibling_bookmark<'a>(
&self,
siblings: &'a [Bookmark],
bookmark: &Bookmark,
) -> Option<&'a Bookmark> {
next_sibling_bookmark(siblings, bookmark)
}
#[inline]
pub fn bookmark_get_next_sibling<'a>(
&self,
siblings: &'a [Bookmark],
bookmark: &Bookmark,
) -> Option<&'a Bookmark> {
self.next_sibling_bookmark(siblings, bookmark)
}
#[deprecated(
note = "use `bookmark_get_next_sibling()` — matches upstream `FPDFBookmark_GetNextSibling`"
)]
#[inline]
pub fn get_next_sibling<'a>(
&self,
siblings: &'a [Bookmark],
bookmark: &Bookmark,
) -> Option<&'a Bookmark> {
self.next_sibling_bookmark(siblings, bookmark)
}
pub fn signatures(&self) -> Result<Vec<SignatureObject>> {
let catalog = self.store.resolve(self.catalog_id)?;
Ok(collect_signatures(catalog, &self.store)?)
}
pub fn signature_count(&self) -> Result<usize> {
Ok(self.signatures()?.len())
}
#[inline]
pub fn get_signature_count(&self) -> Result<usize> {
self.signature_count()
}
pub fn signature_object(&self, index: usize) -> Result<Option<SignatureObject>> {
let sigs = self.signatures()?;
Ok(sigs.into_iter().nth(index))
}
#[inline]
pub fn get_signature_object(&self, index: usize) -> Result<Option<SignatureObject>> {
self.signature_object(index)
}
pub fn file_identifier(&self, id_type: FileIdentifierType) -> Option<Vec<u8>> {
self.store
.trailer()
.id
.as_ref()
.map(|ids| ids[id_type as usize].clone())
}
#[inline]
pub fn get_file_identifier(&self, id_type: FileIdentifierType) -> Option<Vec<u8>> {
self.file_identifier(id_type)
}
pub fn pdf_version(&self) -> (u8, u8) {
let header = self.store.file_version();
let header_pair = (header.major, header.minor);
let catalog_id = self.store.trailer().root;
let override_ver = (|| -> Option<(u8, u8)> {
let catalog_obj = self.store.resolve(catalog_id).ok()?;
let dict = catalog_obj.as_dict()?;
let ver_val = dict.get(&Name::from_bytes(b"Version".to_vec()))?;
let resolved = self.store.deep_resolve(ver_val).ok()?;
let ver_name = resolved.as_name()?;
let b = ver_name.as_bytes();
if b.len() == 3 && b[1] == b'.' && b[0].is_ascii_digit() && b[2].is_ascii_digit() {
Some((b[0] - b'0', b[2] - b'0'))
} else {
None
}
})();
override_ver.unwrap_or(header_pair)
}
#[inline]
pub fn get_file_version(&self) -> (u8, u8) {
self.pdf_version()
}
pub fn permissions(&self) -> Option<u32> {
self.store
.security_handler()
.map(|h| h.permissions().bits() as u32)
}
#[deprecated(
note = "use `get_doc_user_permissions()` — matches upstream FPDF_GetDocUserPermissions"
)]
#[inline]
pub fn user_permissions(&self) -> Option<u32> {
self.permissions()
}
#[inline]
pub fn get_doc_permissions(&self) -> Option<u32> {
self.permissions()
}
#[inline]
pub fn get_doc_user_permissions(&self) -> Option<u32> {
self.permissions()
}
pub fn security_revision(&self) -> Option<u32> {
self.store.security_handler().map(|h| h.revision())
}
#[inline]
pub fn get_security_handler_revision(&self) -> Option<u32> {
self.security_revision()
}
pub fn viewer_preferences(&self) -> Result<Option<ViewerPreferences>> {
let catalog = self.store.resolve(self.catalog_id)?;
let catalog_dict = catalog
.as_dict()
.ok_or(PdfError::UnknownObject(self.catalog_id))?;
let vp_obj = match catalog_dict
.get(&Name::viewer_preferences())
.and_then(|o| self.store.deep_resolve(o).ok())
{
Some(o) => o,
None => return Ok(None),
};
let vp_dict = match vp_obj.as_dict() {
Some(d) => d,
None => return Ok(None),
};
Ok(Some(ViewerPreferences::from_dict(vp_dict, &self.store)))
}
pub fn viewerref_get_print_scaling(&self) -> Result<bool> {
Ok(self
.viewer_preferences()?
.map(|vp| vp.print_scaling())
.unwrap_or(true))
}
pub fn viewerref_get_num_copies(&self) -> Result<i32> {
Ok(self
.viewer_preferences()?
.and_then(|vp| vp.num_copies())
.map(|n| n as i32)
.unwrap_or(1))
}
pub fn viewerref_get_print_page_range(&self) -> Result<Option<Vec<i64>>> {
Ok(self
.viewer_preferences()?
.and_then(|vp| vp.print_page_range().map(|r| r.to_vec())))
}
pub fn viewerref_get_print_page_range_count(&self, range: &[i64]) -> usize {
range.len()
}
pub fn viewerref_get_print_page_range_element(
&self,
range: &[i64],
index: usize,
) -> Option<i64> {
range.get(index).copied()
}
pub fn viewerref_get_duplex(&self) -> Result<DuplexMode> {
Ok(self
.viewer_preferences()?
.map(|vp| vp.duplex_mode())
.unwrap_or(DuplexMode::Simplex))
}
pub fn viewerref_get_name(&self, key: &str) -> Result<Option<String>> {
Ok(self
.viewer_preferences()?
.and_then(|vp| vp.generic_name(key).map(|s| s.to_owned())))
}
pub fn attachments(&self) -> Result<Vec<FileSpec>> {
let catalog = self.store.resolve(self.catalog_id)?;
Ok(collect_attachments(catalog, &self.store)?)
}
pub fn attachment_count(&self) -> Result<usize> {
Ok(self.attachments()?.len())
}
#[inline]
pub fn doc_get_attachment_count(&self) -> Result<usize> {
self.attachment_count()
}
#[deprecated(
note = "use `doc_get_attachment_count()` — matches upstream `FPDFDoc_GetAttachmentCount`"
)]
#[inline]
pub fn get_attachment_count(&self) -> Result<usize> {
self.attachment_count()
}
pub fn attachment_at(&self, index: usize) -> Result<Option<FileSpec>> {
let all = self.attachments()?;
Ok(all.into_iter().nth(index))
}
#[inline]
pub fn doc_get_attachment(&self, index: usize) -> Result<Option<FileSpec>> {
self.attachment_at(index)
}
#[deprecated(note = "use `doc_get_attachment()` — matches upstream `FPDFDoc_GetAttachment`")]
#[inline]
pub fn get_attachment(&self, index: usize) -> Result<Option<FileSpec>> {
self.attachment_at(index)
}
pub fn named_destinations(&self) -> Result<Vec<(String, Destination)>> {
let catalog = self.store.resolve(self.catalog_id)?;
Ok(collect_named_destinations(catalog, &self.store)?)
}
pub fn named_dest_by_name(&self, name: &str) -> Result<Option<Destination>> {
let catalog = self.store.resolve(self.catalog_id)?;
let dests = collect_named_destinations(catalog, &self.store)?;
Ok(dests.into_iter().find(|(n, _)| n == name).map(|(_, d)| d))
}
#[inline]
pub fn get_named_dest_by_name(&self, name: &str) -> Result<Option<Destination>> {
self.named_dest_by_name(name)
}
pub fn named_dest_count(&self) -> Result<u32> {
let dests = self.named_destinations()?;
Ok(dests.len() as u32)
}
#[inline]
pub fn count_named_dests(&self) -> Result<u32> {
self.named_dest_count()
}
pub fn named_dest_at(&self, index: usize) -> Result<Option<(String, Destination)>> {
let dests = self.named_destinations()?;
Ok(dests.into_iter().nth(index))
}
#[inline]
pub fn get_named_dest(&self, index: usize) -> Result<Option<(String, Destination)>> {
self.named_dest_at(index)
}
pub fn meta_text(&self, tag: &str) -> Result<Option<String>> {
let meta = match self.metadata()? {
Some(m) => m,
None => return Ok(None),
};
let value = match tag {
"Title" => meta.title,
"Author" => meta.author,
"Subject" => meta.subject,
"Keywords" => meta.keywords,
"Creator" => meta.creator,
"Producer" => meta.producer,
"CreationDate" => meta.creation_date,
"ModDate" => meta.mod_date,
_ => None,
};
Ok(value)
}
#[inline]
pub fn get_meta_text(&self, tag: &str) -> Result<Option<String>> {
self.meta_text(tag)
}
pub fn page_label(&self, page_index: u32) -> Result<Option<String>> {
let catalog = self.store.resolve(self.catalog_id)?;
let labels = parse_page_labels(catalog, &self.store)?;
if labels.is_empty() {
return Ok(None);
}
let idx = page_index as i64;
let range_entry = labels.iter().rfind(|(start, _)| *start <= idx);
match range_entry {
Some((range_start, label)) => {
let offset = idx - range_start;
Ok(Some(format_label(label, offset)))
}
None => Ok(None),
}
}
#[inline]
pub fn get_page_label(&self, page_index: u32) -> Result<Option<String>> {
self.page_label(page_index)
}
pub fn structure_tree(&self) -> Result<Option<StructTree>> {
let catalog = self.store.resolve(self.catalog_id)?;
let catalog_dict = catalog
.as_dict()
.ok_or(PdfError::UnknownObject(self.catalog_id))?;
Ok(StructTree::from_catalog(catalog_dict, &self.store)?)
}
#[inline]
pub fn struct_tree_get_for_page(&self) -> Result<Option<StructTree>> {
self.structure_tree()
}
#[deprecated(
note = "use `struct_tree_get_for_page()` — exact T2 alias for FPDF_StructTree_GetForPage"
)]
#[inline]
pub fn get_for_page(&self) -> Result<Option<StructTree>> {
self.structure_tree()
}
#[deprecated(
note = "use `struct_tree_get_for_page()` — exact T2 alias for FPDF_StructTree_GetForPage"
)]
#[inline]
pub fn get_structure_tree(&self) -> Result<Option<StructTree>> {
self.structure_tree()
}
pub fn is_tagged(&self) -> Result<bool> {
let catalog = self.store.resolve(self.catalog_id)?;
Ok(is_tagged(catalog, &self.store))
}
#[inline]
pub fn catalog_is_tagged(&self) -> Result<bool> {
self.is_tagged()
}
pub fn page_mode(&self) -> Result<PageMode> {
let catalog = self.store.resolve(self.catalog_id)?;
Ok(page_mode(catalog, &self.store))
}
#[inline]
pub fn doc_get_page_mode(&self) -> Result<PageMode> {
self.page_mode()
}
#[deprecated(note = "use `doc_get_page_mode()` — matches upstream `FPDFDoc_GetPageMode`")]
#[inline]
pub fn get_page_mode(&self) -> Result<PageMode> {
self.page_mode()
}
pub fn form_type(&self) -> Result<PdfFormType> {
let catalog = self.store.resolve(self.catalog_id)?;
Ok(rpdfium_doc::pdf_form_type(catalog, &self.store))
}
#[inline]
pub fn get_form_type(&self) -> Result<PdfFormType> {
self.form_type()
}
pub fn load_xfa(&self) -> Result<()> {
Err(Error::Doc(DocError::NotSupported(
"load_xfa: XFA form processing is not implemented in rpdfium".into(),
)))
}
pub fn page_additional_action(
&self,
page_index: usize,
action_type: PageActionType,
) -> Result<Option<Action>> {
if page_index >= self.page_ids.len() {
return Ok(None);
}
let page_dict_id = self.page_ids[page_index];
let page_obj = self.store.resolve(page_dict_id)?;
let page_dict = match page_obj.as_dict() {
Some(d) => d,
None => return Ok(None),
};
let aa_obj = match page_dict.get(&Name::aa()) {
Some(obj) => obj,
None => return Ok(None),
};
let aa_resolved = match self.store.deep_resolve(aa_obj) {
Ok(obj) => obj,
Err(_) => return Ok(None),
};
let aa_dict = match aa_resolved.as_dict() {
Some(d) => d,
None => return Ok(None),
};
let key = Name::from(action_type.pdf_key());
let action_obj = match aa_dict.get(&key) {
Some(obj) => obj,
None => return Ok(None),
};
match rpdfium_doc::action::parse_action(action_obj, &self.store) {
Ok(action) => Ok(Some(action)),
Err(_) => Ok(None),
}
}
#[inline]
pub fn get_page_a_action(
&self,
page_index: usize,
action_type: PageActionType,
) -> Result<Option<Action>> {
self.page_additional_action(page_index, action_type)
}
#[deprecated(note = "Use `get_page_a_action()` (strict upstream name)")]
#[inline]
pub fn get_page_additional_action(
&self,
page_index: usize,
action_type: PageActionType,
) -> Result<Option<Action>> {
self.page_additional_action(page_index, action_type)
}
pub fn link_z_order_at_point(&self, _page_index: usize, _x: f64, _y: f64) -> Result<i32> {
Err(Error::Doc(DocError::NotSupported(
"link_z_order_at_point: annotation z-order tracking is not implemented".into(),
)))
}
#[inline]
pub fn link_get_link_z_order_at_point(&self, page_index: usize, x: f64, y: f64) -> Result<i32> {
self.link_z_order_at_point(page_index, x, y)
}
#[deprecated(
note = "use `link_get_link_z_order_at_point()` — matches upstream `FPDFLink_GetLinkZOrderAtPoint`"
)]
#[inline]
pub fn get_link_z_order_at_point(&self, page_index: usize, x: f64, y: f64) -> Result<i32> {
self.link_z_order_at_point(page_index, x, y)
}
#[deprecated(
note = "use `has_valid_cross_reference_table()` — matches upstream FPDF_DocumentHasValidCrossReferenceTable"
)]
#[inline]
pub fn xref_rebuilt(&self) -> bool {
self.store.xref_table_rebuilt()
}
pub fn has_valid_cross_reference_table(&self) -> bool {
!self.store.xref_table_rebuilt()
}
#[inline]
pub fn document_has_valid_cross_reference_table(&self) -> bool {
self.has_valid_cross_reference_table()
}
pub fn trailer_ends(&self) -> Vec<u64> {
Vec::new()
}
#[inline]
pub fn get_trailer_ends(&self) -> Vec<u64> {
self.trailer_ends()
}
pub fn page_size_by_index_f(&self, index: u32) -> Result<(f32, f32)> {
let page = self.page(index)?;
let mb = page.media_box();
Ok((mb.width() as f32, mb.height() as f32))
}
#[inline]
pub fn get_page_size_by_index_f(&self, index: u32) -> Result<(f32, f32)> {
self.page_size_by_index_f(index)
}
pub fn catalog_language(&self) -> Result<Option<String>> {
let catalog = self.store.resolve(self.catalog_id)?;
let dict = match catalog.as_dict() {
Some(d) => d,
None => return Ok(None),
};
let lang = match dict.get(&Name::lang()) {
Some(o) => o,
None => return Ok(None),
};
let resolved = self
.store
.deep_resolve(lang)
.map_err(|e| Error::Doc(DocError::NotSupported(e.to_string())))?;
if let Some(s) = resolved.as_string() {
return Ok(Some(s.to_string_lossy()));
}
Ok(None)
}
#[inline]
pub fn catalog_get_language(&self) -> Result<Option<String>> {
self.catalog_language()
}
pub fn page_size_by_index(&self, index: u32) -> Result<(f64, f64)> {
let (w, h) = self.page_size_by_index_f(index)?;
Ok((f64::from(w), f64::from(h)))
}
#[inline]
pub fn get_page_size_by_index(&self, index: u32) -> Result<(f64, f64)> {
self.page_size_by_index(index)
}
pub fn javascript_actions(&self) -> Result<Vec<JavaScriptAction>> {
let catalog = self.store.resolve(self.catalog_id)?;
collect_javascript_actions(catalog, &self.store).map_err(Error::Doc)
}
pub fn javascript_action_count(&self) -> Result<usize> {
Ok(self.javascript_actions()?.len())
}
#[inline]
pub fn doc_get_javascript_action_count(&self) -> Result<usize> {
self.javascript_action_count()
}
pub fn javascript_action_at(&self, index: usize) -> Result<Option<JavaScriptAction>> {
let mut actions = self.javascript_actions()?;
if index < actions.len() {
Ok(Some(actions.swap_remove(index)))
} else {
Ok(None)
}
}
#[inline]
pub fn doc_get_javascript_action(&self, index: usize) -> Result<Option<JavaScriptAction>> {
self.javascript_action_at(index)
}
pub fn store(&self) -> &ObjectStore<Arc<[u8]>> {
&self.store
}
}
pub struct Page<'doc> {
store: &'doc ObjectStore<Arc<[u8]>>,
font_cache: &'doc DashMapFontCache,
page_index: u32,
page_dict_id: ObjectId,
media_box: Rect,
display_tree: OnceLock<DisplayTree>,
options: &'doc OpenOptions,
oc_context: Option<&'doc rpdfium_page::OCContext>,
}
impl<'doc> Page<'doc> {
pub fn media_box(&self) -> Rect {
self.media_box
}
#[inline]
pub fn page_get_media_box(&self) -> Rect {
self.media_box()
}
#[deprecated(note = "use `page_get_media_box()` — matches upstream `FPDFPage_GetMediaBox`")]
#[inline]
pub fn get_media_box(&self) -> Rect {
self.media_box()
}
pub fn page_width_f(&self) -> f32 {
self.media_box.width() as f32
}
#[inline]
pub fn get_page_width_f(&self) -> f32 {
self.page_width_f()
}
pub fn page_width(&self) -> f64 {
self.media_box.width()
}
#[inline]
pub fn get_page_width(&self) -> f64 {
self.page_width()
}
pub fn page_height_f(&self) -> f32 {
self.media_box.height() as f32
}
#[inline]
pub fn get_page_height_f(&self) -> f32 {
self.page_height_f()
}
pub fn page_height(&self) -> f64 {
self.media_box.height()
}
#[inline]
pub fn get_page_height(&self) -> f64 {
self.page_height()
}
pub fn crop_box(&self) -> Result<Option<Rect>> {
let page_obj = self.store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
Ok(parse_rect(page_dict, &Name::crop_box(), self.store))
}
#[inline]
pub fn page_get_crop_box(&self) -> Result<Option<Rect>> {
self.crop_box()
}
#[deprecated(note = "use `page_get_crop_box()` — matches upstream `FPDFPage_GetCropBox`")]
#[inline]
pub fn get_crop_box(&self) -> Result<Option<Rect>> {
self.crop_box()
}
pub fn bleed_box(&self) -> Result<Option<Rect>> {
let page_obj = self.store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
Ok(parse_rect(page_dict, &Name::bleed_box(), self.store))
}
#[inline]
pub fn page_get_bleed_box(&self) -> Result<Option<Rect>> {
self.bleed_box()
}
#[deprecated(note = "use `page_get_bleed_box()` — matches upstream `FPDFPage_GetBleedBox`")]
#[inline]
pub fn get_bleed_box(&self) -> Result<Option<Rect>> {
self.bleed_box()
}
pub fn trim_box(&self) -> Result<Option<Rect>> {
let page_obj = self.store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
Ok(parse_rect(page_dict, &Name::trim_box(), self.store))
}
#[inline]
pub fn page_get_trim_box(&self) -> Result<Option<Rect>> {
self.trim_box()
}
#[deprecated(note = "use `page_get_trim_box()` — matches upstream `FPDFPage_GetTrimBox`")]
#[inline]
pub fn get_trim_box(&self) -> Result<Option<Rect>> {
self.trim_box()
}
pub fn art_box(&self) -> Result<Option<Rect>> {
let page_obj = self.store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
Ok(parse_rect(page_dict, &Name::art_box(), self.store))
}
#[inline]
pub fn page_get_art_box(&self) -> Result<Option<Rect>> {
self.art_box()
}
#[deprecated(note = "use `page_get_art_box()` — matches upstream `FPDFPage_GetArtBox`")]
#[inline]
pub fn get_art_box(&self) -> Result<Option<Rect>> {
self.art_box()
}
pub fn bounding_box(&self) -> Result<Rect> {
let crop = self.crop_box()?;
let media = self.media_box;
Ok(match crop {
Some(c) => Rect::new(
media.left.max(c.left),
media.bottom.max(c.bottom),
media.right.min(c.right),
media.top.min(c.top),
),
None => media,
})
}
#[inline]
pub fn get_page_bounding_box(&self) -> Result<Rect> {
self.bounding_box()
}
pub fn rotation(&self) -> Result<u32> {
let page_obj = self.store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
let rotation = page_dict
.get(&Name::rotate())
.and_then(|obj| self.store.deep_resolve(obj).ok().and_then(|o| o.as_i64()))
.unwrap_or(0);
Ok(rotation.rem_euclid(360) as u32)
}
#[inline]
pub fn page_get_rotation(&self) -> Result<u32> {
self.rotation()
}
#[deprecated(note = "use `page_get_rotation()` — matches upstream `FPDFPage_GetRotation`")]
#[inline]
pub fn get_rotation(&self) -> Result<u32> {
self.rotation()
}
#[deprecated(note = "use `page_get_rotation()` — matches upstream `FPDFPage_GetRotation`")]
#[inline]
pub fn get_page_rotation(&self) -> Result<u32> {
self.rotation()
}
pub fn interpret(&self) -> Result<&DisplayTree> {
if let Some(tree) = self.display_tree.get() {
return Ok(tree);
}
let tree = self.interpret_inner()?;
let _ = self.display_tree.set(tree);
Ok(self.display_tree.get().unwrap())
}
fn interpret_inner(&self) -> Result<DisplayTree> {
let page_obj = self.store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
let content_bytes = decode_page_contents(page_dict, self.store)?;
let operators = tokenize_content_stream(&content_bytes)?;
let resources = resolve_resources(self.store, page_dict)?;
let bridge = FontCacheBridge {
font_cache: self.font_cache,
store: self.store,
resources: &resources,
};
let ctx = InterpreterContext {
store: self.store,
font_cache: &bridge,
mode: self.options.parsing_mode,
oc_context: self.oc_context,
};
let tree = interpret(
&operators,
&ctx,
&resources,
self.options.max_operators_per_page,
)?;
Ok(tree)
}
pub fn render(&self, config: &RenderConfig) -> Result<rpdfium_graphics::Bitmap> {
let tree = self.interpret()?;
let decoder = image_decode::PdfImageDecoder::new(self.store);
let bitmap = rpdfium_render::render_with_images(tree, config, &decoder)?;
Ok(bitmap)
}
#[inline]
pub fn render_page_bitmap(&self, config: &RenderConfig) -> Result<rpdfium_graphics::Bitmap> {
self.render(config)
}
pub fn render_with_matrix(
&self,
matrix: Matrix,
clip: Option<Rect>,
width: u32,
height: u32,
) -> Result<rpdfium_graphics::Bitmap> {
let mut config = RenderConfig::default()
.with_size(width, height)
.with_transform(matrix);
if let Some(r) = clip {
config = config.with_clip(r);
}
self.render(&config)
}
#[inline]
pub fn render_page_bitmap_with_matrix(
&self,
matrix: Matrix,
clip: Option<Rect>,
width: u32,
height: u32,
) -> Result<rpdfium_graphics::Bitmap> {
self.render_with_matrix(matrix, clip, width, height)
}
pub fn text(&self) -> Result<TextPage> {
let tree = self.interpret()?;
let mut extractor = TextExtractor::new();
walk(tree, &mut extractor);
let (characters, run_ids) = extractor.into_characters();
Ok(TextPage::new_with_run_ids(characters, run_ids, false))
}
pub fn annotations(&self) -> Result<Vec<Annotation>> {
let page_obj = self.store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
match page_dict.get(&Name::annots()) {
Some(annots_obj) => {
let annots = parse_annotations(annots_obj, self.store)?;
Ok(annots)
}
None => Ok(Vec::new()),
}
}
pub fn annotation_count(&self) -> Result<usize> {
Ok(self.annotations()?.len())
}
#[inline]
pub fn page_get_annot_count(&self) -> Result<usize> {
self.annotation_count()
}
#[deprecated(note = "use `page_get_annot_count()` — matches upstream `FPDFPage_GetAnnotCount`")]
#[inline]
pub fn get_annot_count(&self) -> Result<usize> {
self.annotation_count()
}
pub fn annotation_at(&self, index: usize) -> Result<Option<Annotation>> {
Ok(self.annotations()?.into_iter().nth(index))
}
#[inline]
pub fn page_get_annot(&self, index: usize) -> Result<Option<Annotation>> {
self.annotation_at(index)
}
#[deprecated(note = "use `page_get_annot()` — matches upstream `FPDFPage_GetAnnot`")]
#[inline]
pub fn get_annot(&self, index: usize) -> Result<Option<Annotation>> {
self.annotation_at(index)
}
pub fn annotation_index(&self, annot: &Annotation) -> Result<Option<usize>> {
Ok(self
.annotations()?
.iter()
.position(|a| std::ptr::eq(a as *const Annotation, annot as *const Annotation)))
}
#[inline]
pub fn page_get_annot_index(&self, annot: &Annotation) -> Result<Option<usize>> {
self.annotation_index(annot)
}
#[deprecated(note = "use `page_get_annot_index()` — matches upstream `FPDFPage_GetAnnotIndex`")]
#[inline]
pub fn get_annot_index(&self, annot: &Annotation) -> Result<Option<usize>> {
self.annotation_index(annot)
}
pub fn thumbnail(&self) -> Result<Option<Bitmap>> {
decode_page_thumbnail(self.store, self.page_dict_id)
}
#[inline]
pub fn page_get_thumbnail_as_bitmap(&self) -> Result<Option<Bitmap>> {
self.thumbnail()
}
#[deprecated(
note = "use `page_get_thumbnail_as_bitmap()` — matches upstream `FPDFPage_GetThumbnailAsBitmap`"
)]
#[inline]
pub fn get_thumbnail_as_bitmap(&self) -> Result<Option<Bitmap>> {
self.thumbnail()
}
pub fn thumbnail_decoded_bytes(&self) -> Result<Option<Vec<u8>>> {
thumbnail_raw_or_decoded(self.store, self.page_dict_id, true)
}
#[inline]
pub fn page_get_decoded_thumbnail_data(&self) -> Result<Option<Vec<u8>>> {
self.thumbnail_decoded_bytes()
}
#[deprecated(
note = "use `page_get_decoded_thumbnail_data()` — matches upstream `FPDFPage_GetDecodedThumbnailData`"
)]
#[inline]
pub fn get_decoded_thumbnail_data(&self) -> Result<Option<Vec<u8>>> {
self.thumbnail_decoded_bytes()
}
pub fn thumbnail_raw_bytes(&self) -> Result<Option<Vec<u8>>> {
thumbnail_raw_or_decoded(self.store, self.page_dict_id, false)
}
#[inline]
pub fn page_get_raw_thumbnail_data(&self) -> Result<Option<Vec<u8>>> {
self.thumbnail_raw_bytes()
}
#[deprecated(
note = "use `page_get_raw_thumbnail_data()` — matches upstream `FPDFPage_GetRawThumbnailData`"
)]
#[inline]
pub fn get_raw_thumbnail_data(&self) -> Result<Option<Vec<u8>>> {
self.thumbnail_raw_bytes()
}
pub fn index(&self) -> u32 {
self.page_index
}
}
pub(crate) fn parse_rect(
dict: &std::collections::HashMap<Name, Object>,
key: &Name,
store: &ObjectStore<Arc<[u8]>>,
) -> Option<Rect> {
let obj = dict.get(key)?;
let resolved = store.deep_resolve(obj).ok()?;
parse_rect_from_obj(resolved)
}
pub(crate) fn parse_rect_from_obj(obj: &Object) -> Option<Rect> {
let arr = obj.as_array()?;
if arr.len() < 4 {
return None;
}
let vals: Vec<f64> = arr.iter().take(4).filter_map(|o| o.as_f64()).collect();
if vals.len() < 4 {
return None;
}
Some(Rect::new(vals[0], vals[1], vals[2], vals[3]))
}
pub(crate) fn decode_page_contents(
page_dict: &std::collections::HashMap<Name, Object>,
store: &ObjectStore<Arc<[u8]>>,
) -> std::result::Result<Vec<u8>, PdfError> {
let contents_obj = match page_dict.get(&Name::contents()) {
Some(obj) => obj,
None => return Ok(Vec::new()),
};
let resolved = store.deep_resolve(contents_obj)?;
match resolved {
Object::Stream { .. } => {
store.decode_stream(resolved)
}
Object::Array(arr) => {
let mut all_bytes = Vec::new();
for item in arr {
if let Some(ref_id) = item.as_reference() {
let stream_obj = store.resolve(ref_id)?;
if let Object::Stream { .. } = stream_obj {
let decoded = store.decode_stream(stream_obj)?;
if !all_bytes.is_empty() {
all_bytes.push(b' ');
}
all_bytes.extend_from_slice(&decoded);
}
}
}
Ok(all_bytes)
}
Object::Reference(id) => {
let stream_obj = store.resolve(*id)?;
if let Object::Stream { .. } = stream_obj {
store.decode_stream(stream_obj)
} else {
Ok(Vec::new())
}
}
_ => Ok(Vec::new()),
}
}
pub fn page_to_device(
page: &Page<'_>,
page_width: u32,
page_height: u32,
rotate: u32,
page_x: f64,
page_y: f64,
) -> (i32, i32) {
let matrix = compute_page_transform(&page.media_box(), page_width, page_height, rotate);
let pt = matrix.transform_point(Point {
x: page_x,
y: page_y,
});
(pt.x.round() as i32, pt.y.round() as i32)
}
pub fn device_to_page(
page: &Page<'_>,
page_width: u32,
page_height: u32,
rotate: u32,
device_x: i32,
device_y: i32,
) -> (f64, f64) {
let matrix = compute_page_transform(&page.media_box(), page_width, page_height, rotate);
match matrix.inverse() {
Some(inv) => {
let pt = inv.transform_point(Point {
x: device_x as f64,
y: device_y as f64,
});
(pt.x, pt.y)
}
None => (0.0, 0.0),
}
}
fn decode_page_thumbnail(
store: &ObjectStore<Arc<[u8]>>,
page_dict_id: ObjectId,
) -> Result<Option<Bitmap>> {
let page_obj = store.resolve(page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(page_dict_id))?;
let thumb_obj = match page_dict.get(&Name::thumb()) {
Some(obj) => obj,
None => return Ok(None),
};
let resolved = store.deep_resolve(thumb_obj)?;
let stream_dict = match resolved.as_stream_dict() {
Some(d) => d,
None => return Ok(None),
};
let width = get_dict_int(stream_dict, &Name::width(), store).unwrap_or(0) as u32;
let height = get_dict_int(stream_dict, &Name::height(), store).unwrap_or(0) as u32;
let bpc = get_dict_int(stream_dict, &Name::bits_per_component(), store).unwrap_or(8) as u32;
if width == 0 || height == 0 {
return Ok(None);
}
let (n_components, cs_type) = resolve_image_color_space(stream_dict, store);
let decoded = store.decode_stream(resolved)?;
let decode_array = read_decode_array(stream_dict, n_components, store);
let rgba = convert_to_rgba(
&decoded,
width,
height,
bpc,
n_components,
&cs_type,
&decode_array,
false,
);
let stride = width * 4; Ok(Some(Bitmap {
width,
height,
format: BitmapFormat::Rgba32,
stride,
data: rgba,
}))
}
pub(crate) fn thumbnail_raw_or_decoded(
store: &ObjectStore<Arc<[u8]>>,
page_dict_id: ObjectId,
decode: bool,
) -> Result<Option<Vec<u8>>> {
let page_obj = store.resolve(page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(page_dict_id))?;
let thumb_obj = match page_dict.get(&Name::thumb()) {
Some(obj) => obj,
None => return Ok(None),
};
let (thumb_id, resolved) = match thumb_obj {
Object::Reference(id) => (*id, store.resolve(*id)?),
other => {
let r = store.deep_resolve(other)?;
return match r {
Object::Stream { .. } if decode => {
let decoded = store.decode_stream(r)?;
Ok(Some(decoded))
}
Object::Stream {
data: rpdfium_parser::object::StreamData::Decoded { data },
..
} => Ok(Some(data.clone())),
_ => Ok(None),
};
}
};
match resolved {
Object::Stream { .. } => {
if decode {
let decoded = store.decode_stream(resolved)?;
Ok(Some(decoded))
} else {
let raw = store.raw_stream_bytes_for_object(resolved, thumb_id)?;
Ok(Some(raw))
}
}
_ => Ok(None),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn minimal_pdf() -> Vec<u8> {
let mut pdf = Vec::new();
pdf.extend_from_slice(b"%PDF-1.4\n");
let off1 = pdf.len();
pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
let off2 = pdf.len();
pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n");
let off3 = pdf.len();
pdf.extend_from_slice(
b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\n",
);
let xref_offset = pdf.len();
pdf.extend_from_slice(b"xref\n0 4\n");
pdf.extend_from_slice(b"0000000000 65535 f \r\n");
pdf.extend_from_slice(format!("{:010} 00000 n \r\n", off1).as_bytes());
pdf.extend_from_slice(format!("{:010} 00000 n \r\n", off2).as_bytes());
pdf.extend_from_slice(format!("{:010} 00000 n \r\n", off3).as_bytes());
pdf.extend_from_slice(b"trailer\n<< /Size 4 /Root 1 0 R >>\n");
pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF").as_bytes());
pdf
}
#[test]
fn test_has_valid_cross_reference_table_valid_pdf() {
let lib = Library::new();
let opts = OpenOptions::default();
let doc = Document::open(&lib, minimal_pdf(), &opts).unwrap();
assert!(doc.has_valid_cross_reference_table());
}
#[test]
fn test_trailer_ends_returns_empty_vec() {
let lib = Library::new();
let opts = OpenOptions::default();
let doc = Document::open(&lib, minimal_pdf(), &opts).unwrap();
let ends = doc.trailer_ends();
let _ = ends; }
}