use std::sync::{Arc, OnceLock};
use rpdfium_core::error::{ObjectId, PdfError};
use rpdfium_core::{Matrix, Name, OpenOptions, Rect};
use rpdfium_doc::{
Annotation, Bookmark, DocumentMetadata, FileSpec, PageStructure, StructTree, ViewerPreferences,
collect_attachments, collect_signatures,
};
use rpdfium_font::DashMapFontCache;
use rpdfium_page::display::{DisplayTree, walk};
use rpdfium_page::{InterpreterContext, collect_page_ids, interpret, resolve_resources};
use rpdfium_parser::{ObjectStore, tokenize_content_stream};
use rpdfium_text::{TextExtractor, TextPage};
use rpdfium_graphics::Bitmap;
use crate::{
Error, FontCacheBridge, PdfReader, RenderConfig, Result, SignatureObject, decode_page_contents,
decode_page_thumbnail, parse_annotations, parse_bookmarks, parse_metadata, parse_rect,
parse_rect_from_obj, thumbnail_raw_or_decoded,
};
#[derive(Clone)]
pub struct ArcLibrary {
#[allow(dead_code)]
inner: Arc<LibraryInner>,
}
struct LibraryInner {
_private: (),
}
impl ArcLibrary {
pub fn new() -> Self {
Self {
inner: Arc::new(LibraryInner { _private: () }),
}
}
}
impl Default for ArcLibrary {
fn default() -> Self {
Self::new()
}
}
#[derive(Clone)]
pub struct ArcDocument {
inner: Arc<DocumentInner>,
}
struct DocumentInner {
store: ObjectStore<Arc<[u8]>>,
font_cache: DashMapFontCache,
page_ids: Vec<ObjectId>,
catalog_id: ObjectId,
options: OpenOptions,
oc_context: Option<rpdfium_page::OCContext>,
}
impl ArcDocument {
pub fn open(_library: &ArcLibrary, data: Vec<u8>, options: &OpenOptions) -> Result<Self> {
let arc_data: Arc<[u8]> = Arc::from(data);
let store = ObjectStore::open_with_password(
arc_data,
options.parsing_mode,
options.password.as_deref(),
)?;
let page_ids = collect_page_ids(&store)?;
let catalog_id = store.trailer().root;
let font_cache = DashMapFontCache::new();
let oc_context = rpdfium_page::OCContext::from_catalog(&store, catalog_id);
Ok(ArcDocument {
inner: Arc::new(DocumentInner {
store,
font_cache,
page_ids,
catalog_id,
options: options.clone(),
oc_context,
}),
})
}
#[inline]
pub fn load_mem_document(
library: &ArcLibrary,
data: Vec<u8>,
options: &OpenOptions,
) -> Result<Self> {
Self::open(library, data, options)
}
pub fn open_file(
library: &ArcLibrary,
path: impl AsRef<std::path::Path>,
options: &OpenOptions,
) -> Result<Self> {
let data = std::fs::read(path).map_err(PdfError::Io)?;
Self::open(library, data, options)
}
#[inline]
pub fn load_document(
library: &ArcLibrary,
path: impl AsRef<std::path::Path>,
options: &OpenOptions,
) -> Result<Self> {
Self::open_file(library, path, options)
}
pub fn open_custom(
library: &ArcLibrary,
reader: impl PdfReader,
options: &OpenOptions,
) -> Result<Self> {
let len = reader.file_len() as usize;
let mut data = vec![0u8; len];
let mut offset = 0;
while offset < data.len() {
let n = reader
.read_at(offset as u64, &mut data[offset..])
.map_err(PdfError::Io)?;
if n == 0 {
break;
}
offset += n;
}
data.truncate(offset);
Self::open(library, data, options)
}
#[inline]
pub fn load_custom_document(
library: &ArcLibrary,
reader: impl PdfReader,
options: &OpenOptions,
) -> Result<Self> {
Self::open_custom(library, reader, options)
}
pub fn page_count(&self) -> u32 {
self.inner.page_ids.len() as u32
}
#[inline]
pub fn get_page_count(&self) -> u32 {
self.page_count()
}
pub fn page(&self, index: u32) -> Result<ArcPage> {
let count = self.page_count();
if index >= count {
return Err(Error::PageOutOfRange { index, count });
}
let page_dict_id = self.inner.page_ids[index as usize];
let page_obj = self.inner.store.resolve(page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(page_dict_id))?;
let media_box = parse_rect(page_dict, &Name::media_box(), &self.inner.store)
.or_else(|| {
let inherited = rpdfium_page::find_inherited_entry(
&self.inner.store,
page_dict,
&Name::media_box(),
)
.ok()??;
parse_rect_from_obj(&inherited)
})
.unwrap_or(Rect::new(0.0, 0.0, 612.0, 792.0));
Ok(ArcPage {
doc: self.clone(),
page_index: index,
page_dict_id,
media_box,
display_tree: OnceLock::new(),
})
}
#[inline]
pub fn load_page(&self, index: u32) -> Result<ArcPage> {
self.page(index)
}
pub fn metadata(&self) -> Result<Option<DocumentMetadata>> {
match self.inner.store.trailer().info {
Some(info_id) => {
let info_obj = self.inner.store.resolve(info_id)?;
let meta = parse_metadata(info_obj, &self.inner.store)?;
Ok(Some(meta))
}
None => Ok(None),
}
}
pub fn bookmarks(&self) -> Result<Vec<Bookmark>> {
let catalog_obj = self.inner.store.resolve(self.inner.catalog_id)?;
let bookmarks = parse_bookmarks(catalog_obj, &self.inner.store)?;
Ok(bookmarks)
}
pub fn signatures(&self) -> Result<Vec<SignatureObject>> {
let catalog = self.inner.store.resolve(self.inner.catalog_id)?;
Ok(collect_signatures(catalog, &self.inner.store)?)
}
pub fn pdf_version(&self) -> (u8, u8) {
let v = self.inner.store.file_version();
(v.major, v.minor)
}
#[inline]
pub fn get_file_version(&self) -> (u8, u8) {
self.pdf_version()
}
pub fn permissions(&self) -> Option<u32> {
self.inner
.store
.security_handler()
.map(|h| h.permissions().bits() as u32)
}
#[inline]
pub fn get_doc_permissions(&self) -> Option<u32> {
self.permissions()
}
#[inline]
pub fn get_doc_user_permissions(&self) -> Option<u32> {
self.permissions()
}
#[deprecated(
note = "use `get_doc_user_permissions()` — matches upstream FPDF_GetDocUserPermissions"
)]
#[inline]
pub fn user_permissions(&self) -> Option<u32> {
self.permissions()
}
pub fn security_revision(&self) -> Option<u32> {
self.inner.store.security_handler().map(|h| h.revision())
}
#[inline]
pub fn get_security_handler_revision(&self) -> Option<u32> {
self.security_revision()
}
pub fn viewer_preferences(&self) -> Result<Option<ViewerPreferences>> {
let store = &self.inner.store;
let catalog = store.resolve(self.inner.catalog_id)?;
let catalog_dict = catalog
.as_dict()
.ok_or(PdfError::UnknownObject(self.inner.catalog_id))?;
let vp_obj = match catalog_dict
.get(&Name::viewer_preferences())
.and_then(|o| store.deep_resolve(o).ok())
{
Some(o) => o,
None => return Ok(None),
};
let vp_dict = match vp_obj.as_dict() {
Some(d) => d,
None => return Ok(None),
};
Ok(Some(ViewerPreferences::from_dict(vp_dict, store)))
}
pub fn attachments(&self) -> Result<Vec<FileSpec>> {
let catalog = self.inner.store.resolve(self.inner.catalog_id)?;
Ok(collect_attachments(catalog, &self.inner.store)?)
}
pub fn attachment_count(&self) -> Result<usize> {
Ok(self.attachments()?.len())
}
#[inline]
pub fn doc_get_attachment_count(&self) -> Result<usize> {
self.attachment_count()
}
#[deprecated(
note = "use `doc_get_attachment_count()` — matches upstream `FPDFDoc_GetAttachmentCount`"
)]
#[inline]
pub fn get_attachment_count(&self) -> Result<usize> {
self.attachment_count()
}
pub fn attachment_at(&self, index: usize) -> Result<Option<FileSpec>> {
let all = self.attachments()?;
Ok(all.into_iter().nth(index))
}
#[inline]
pub fn doc_get_attachment(&self, index: usize) -> Result<Option<FileSpec>> {
self.attachment_at(index)
}
#[deprecated(note = "use `doc_get_attachment()` — matches upstream `FPDFDoc_GetAttachment`")]
#[inline]
pub fn get_attachment(&self, index: usize) -> Result<Option<FileSpec>> {
self.attachment_at(index)
}
pub fn store(&self) -> &ObjectStore<Arc<[u8]>> {
&self.inner.store
}
pub fn render_pages_parallel(
&self,
page_indices: &[u32],
config: &RenderConfig,
) -> Vec<Result<rpdfium_graphics::Bitmap>> {
use rayon::prelude::*;
page_indices
.par_iter()
.map(|&page_idx| {
let page = self.page(page_idx)?;
let tree = page.interpret()?;
let decoder = crate::image_decode::PdfImageDecoder::new(&page.doc.inner.store);
let bitmap = rpdfium_render::render_with_images(tree, config, &decoder)?;
Ok(bitmap)
})
.collect()
}
pub fn render_all_pages_parallel(
&self,
config: &RenderConfig,
) -> Vec<Result<rpdfium_graphics::Bitmap>> {
let count = self.page_count();
let indices: Vec<u32> = (0..count).collect();
self.render_pages_parallel(&indices, config)
}
}
#[derive(Clone)]
pub struct ArcPage {
doc: ArcDocument,
page_index: u32,
page_dict_id: ObjectId,
media_box: Rect,
display_tree: OnceLock<DisplayTree>,
}
impl ArcPage {
pub fn media_box(&self) -> Rect {
self.media_box
}
#[inline]
pub fn page_get_media_box(&self) -> Rect {
self.media_box()
}
#[deprecated(note = "use `page_get_media_box()` — matches upstream `FPDFPage_GetMediaBox`")]
#[inline]
pub fn get_media_box(&self) -> Rect {
self.media_box()
}
pub fn page_width_f(&self) -> f32 {
self.media_box.width() as f32
}
#[inline]
pub fn get_page_width_f(&self) -> f32 {
self.page_width_f()
}
pub fn page_width(&self) -> f64 {
self.media_box.width()
}
#[inline]
pub fn get_page_width(&self) -> f64 {
self.page_width()
}
pub fn page_height_f(&self) -> f32 {
self.media_box.height() as f32
}
#[inline]
pub fn get_page_height_f(&self) -> f32 {
self.page_height_f()
}
pub fn page_height(&self) -> f64 {
self.media_box.height()
}
#[inline]
pub fn get_page_height(&self) -> f64 {
self.page_height()
}
pub fn crop_box(&self) -> Result<Option<Rect>> {
let store = &self.doc.inner.store;
let page_obj = store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
Ok(parse_rect(page_dict, &Name::crop_box(), store))
}
#[inline]
pub fn page_get_crop_box(&self) -> Result<Option<Rect>> {
self.crop_box()
}
#[deprecated(note = "use `page_get_crop_box()` — matches upstream `FPDFPage_GetCropBox`")]
#[inline]
pub fn get_crop_box(&self) -> Result<Option<Rect>> {
self.crop_box()
}
pub fn bleed_box(&self) -> Result<Option<Rect>> {
let store = &self.doc.inner.store;
let page_obj = store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
Ok(parse_rect(page_dict, &Name::bleed_box(), store))
}
#[inline]
pub fn page_get_bleed_box(&self) -> Result<Option<Rect>> {
self.bleed_box()
}
#[deprecated(note = "use `page_get_bleed_box()` — matches upstream `FPDFPage_GetBleedBox`")]
#[inline]
pub fn get_bleed_box(&self) -> Result<Option<Rect>> {
self.bleed_box()
}
pub fn trim_box(&self) -> Result<Option<Rect>> {
let store = &self.doc.inner.store;
let page_obj = store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
Ok(parse_rect(page_dict, &Name::trim_box(), store))
}
#[inline]
pub fn page_get_trim_box(&self) -> Result<Option<Rect>> {
self.trim_box()
}
#[deprecated(note = "use `page_get_trim_box()` — matches upstream `FPDFPage_GetTrimBox`")]
#[inline]
pub fn get_trim_box(&self) -> Result<Option<Rect>> {
self.trim_box()
}
pub fn art_box(&self) -> Result<Option<Rect>> {
let store = &self.doc.inner.store;
let page_obj = store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
Ok(parse_rect(page_dict, &Name::art_box(), store))
}
#[inline]
pub fn page_get_art_box(&self) -> Result<Option<Rect>> {
self.art_box()
}
#[deprecated(note = "use `page_get_art_box()` — matches upstream `FPDFPage_GetArtBox`")]
#[inline]
pub fn get_art_box(&self) -> Result<Option<Rect>> {
self.art_box()
}
pub fn bounding_box(&self) -> Result<Rect> {
let crop = self.crop_box()?;
let media = self.media_box;
Ok(match crop {
Some(c) => Rect::new(
media.left.max(c.left),
media.bottom.max(c.bottom),
media.right.min(c.right),
media.top.min(c.top),
),
None => media,
})
}
#[inline]
pub fn get_page_bounding_box(&self) -> Result<Rect> {
self.bounding_box()
}
pub fn rotation(&self) -> Result<u32> {
let store = &self.doc.inner.store;
let page_obj = store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
let rotation = page_dict
.get(&Name::rotate())
.and_then(|obj| store.deep_resolve(obj).ok().and_then(|o| o.as_i64()))
.unwrap_or(0);
Ok(rotation.rem_euclid(360) as u32)
}
#[inline]
pub fn page_get_rotation(&self) -> Result<u32> {
self.rotation()
}
#[deprecated(note = "use `page_get_rotation()` — matches upstream `FPDFPage_GetRotation`")]
#[inline]
pub fn get_rotation(&self) -> Result<u32> {
self.rotation()
}
#[deprecated(note = "use `page_get_rotation()` — matches upstream `FPDFPage_GetRotation`")]
#[inline]
pub fn get_page_rotation(&self) -> Result<u32> {
self.rotation()
}
pub fn interpret(&self) -> Result<&DisplayTree> {
if let Some(tree) = self.display_tree.get() {
return Ok(tree);
}
let tree = self.interpret_inner()?;
let _ = self.display_tree.set(tree);
Ok(self.display_tree.get().unwrap())
}
fn interpret_inner(&self) -> Result<DisplayTree> {
let store = &self.doc.inner.store;
let page_obj = store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
let content_bytes = decode_page_contents(page_dict, store)?;
let operators = tokenize_content_stream(&content_bytes)?;
let resources = resolve_resources(store, page_dict)?;
let bridge = FontCacheBridge {
font_cache: &self.doc.inner.font_cache,
store,
resources: &resources,
};
let ctx = InterpreterContext {
store,
font_cache: &bridge,
mode: self.doc.inner.options.parsing_mode,
oc_context: self.doc.inner.oc_context.as_ref(),
};
let tree = interpret(
&operators,
&ctx,
&resources,
self.doc.inner.options.max_operators_per_page,
)?;
Ok(tree)
}
pub fn render(&self, config: &RenderConfig) -> Result<rpdfium_graphics::Bitmap> {
let tree = self.interpret()?;
let decoder = crate::image_decode::PdfImageDecoder::new(&self.doc.inner.store);
let bitmap = rpdfium_render::render_with_images(tree, config, &decoder)?;
Ok(bitmap)
}
#[inline]
pub fn render_page_bitmap(&self, config: &RenderConfig) -> Result<rpdfium_graphics::Bitmap> {
self.render(config)
}
pub fn render_with_matrix(
&self,
matrix: Matrix,
clip: Option<Rect>,
width: u32,
height: u32,
) -> Result<rpdfium_graphics::Bitmap> {
let mut config = RenderConfig::default()
.with_size(width, height)
.with_transform(matrix);
if let Some(r) = clip {
config = config.with_clip(r);
}
self.render(&config)
}
#[inline]
pub fn render_page_bitmap_with_matrix(
&self,
matrix: Matrix,
clip: Option<Rect>,
width: u32,
height: u32,
) -> Result<rpdfium_graphics::Bitmap> {
self.render_with_matrix(matrix, clip, width, height)
}
pub fn text(&self) -> Result<TextPage> {
let tree = self.interpret()?;
let mut extractor = TextExtractor::new();
walk(tree, &mut extractor);
let (characters, run_ids) = extractor.into_characters();
Ok(TextPage::new_with_run_ids(characters, run_ids, false))
}
pub fn annotations(&self) -> Result<Vec<Annotation>> {
let store = &self.doc.inner.store;
let page_obj = store.resolve(self.page_dict_id)?;
let page_dict = page_obj
.as_dict()
.ok_or(PdfError::UnknownObject(self.page_dict_id))?;
match page_dict.get(&Name::annots()) {
Some(annots_obj) => {
let annots = parse_annotations(annots_obj, store)?;
Ok(annots)
}
None => Ok(Vec::new()),
}
}
pub fn page_structure(&self) -> Option<PageStructure> {
let store = &self.doc.inner.store;
let catalog_id = self.doc.inner.catalog_id;
let catalog_obj = match store.resolve(catalog_id) {
Ok(obj) => obj,
Err(_) => return None,
};
let catalog_dict = catalog_obj.as_dict()?;
let struct_tree = match StructTree::from_catalog(catalog_dict, store) {
Ok(Some(tree)) => tree,
_ => return None,
};
Some(PageStructure::for_page(&struct_tree, self.page_dict_id))
}
pub fn thumbnail(&self) -> Result<Option<Bitmap>> {
let store = &self.doc.inner.store;
decode_page_thumbnail(store, self.page_dict_id)
}
#[inline]
pub fn page_get_thumbnail_as_bitmap(&self) -> Result<Option<Bitmap>> {
self.thumbnail()
}
#[deprecated(
note = "use `page_get_thumbnail_as_bitmap()` — matches upstream `FPDFPage_GetThumbnailAsBitmap`"
)]
#[inline]
pub fn get_thumbnail_as_bitmap(&self) -> Result<Option<Bitmap>> {
self.thumbnail()
}
pub fn thumbnail_decoded_bytes(&self) -> Result<Option<Vec<u8>>> {
let store = &self.doc.inner.store;
thumbnail_raw_or_decoded(store, self.page_dict_id, true)
}
#[inline]
pub fn page_get_decoded_thumbnail_data(&self) -> Result<Option<Vec<u8>>> {
self.thumbnail_decoded_bytes()
}
#[deprecated(
note = "use `page_get_decoded_thumbnail_data()` — matches upstream `FPDFPage_GetDecodedThumbnailData`"
)]
#[inline]
pub fn get_decoded_thumbnail_data(&self) -> Result<Option<Vec<u8>>> {
self.thumbnail_decoded_bytes()
}
pub fn thumbnail_raw_bytes(&self) -> Result<Option<Vec<u8>>> {
let store = &self.doc.inner.store;
thumbnail_raw_or_decoded(store, self.page_dict_id, false)
}
#[inline]
pub fn page_get_raw_thumbnail_data(&self) -> Result<Option<Vec<u8>>> {
self.thumbnail_raw_bytes()
}
#[deprecated(
note = "use `page_get_raw_thumbnail_data()` — matches upstream `FPDFPage_GetRawThumbnailData`"
)]
#[inline]
pub fn get_raw_thumbnail_data(&self) -> Result<Option<Vec<u8>>> {
self.thumbnail_raw_bytes()
}
pub fn index(&self) -> u32 {
self.page_index
}
pub fn document(&self) -> &ArcDocument {
&self.doc
}
}
#[allow(dead_code)]
const _: () = {
fn assert_send<T: Send>() {}
fn assert_sync<T: Sync>() {}
fn assertions() {
assert_send::<ArcLibrary>();
assert_sync::<ArcLibrary>();
assert_send::<ArcDocument>();
assert_sync::<ArcDocument>();
assert_send::<ArcPage>();
assert_sync::<ArcPage>();
}
};