Skip to main content

zpdf_document/
lib.rs

1pub mod annotation;
2mod catalog;
3pub mod font_loader;
4pub mod forms;
5pub mod optional_content;
6pub mod page;
7
8pub use annotation::Annotation;
9pub use catalog::Catalog;
10pub use forms::{AcroForm, FieldKind, FieldValue, FormField};
11pub use optional_content::OcConfig;
12pub use page::{PdfPage, ResourceDict};
13
14use std::sync::{Arc, OnceLock};
15use zpdf_core::{ParseLimits, Result};
16use zpdf_font::FontCache;
17use zpdf_parser::PdfFile;
18
19pub struct PdfDocument {
20    file: PdfFile,
21    catalog: Catalog,
22    /// Lazily-parsed interactive form, shared across page-annotation calls so
23    /// the field-tree walk runs at most once per document.
24    acro_form: OnceLock<Option<AcroForm>>,
25}
26
27impl PdfDocument {
28    pub fn open(data: impl Into<Arc<[u8]>>) -> Result<Self> {
29        Self::open_with_limits(data, ParseLimits::default())
30    }
31
32    pub fn open_with_limits(data: impl Into<Arc<[u8]>>, limits: ParseLimits) -> Result<Self> {
33        Self::open_with_password_and_limits(data, b"", limits)
34    }
35
36    /// Open an encrypted document with a user or owner password. Returns
37    /// [`zpdf_core::Error::WrongPassword`] when the password authenticates as
38    /// neither. (A non-encrypted document opens regardless of the password.)
39    pub fn open_with_password(data: impl Into<Arc<[u8]>>, password: &[u8]) -> Result<Self> {
40        Self::open_with_password_and_limits(data, password, ParseLimits::default())
41    }
42
43    pub fn open_with_password_and_limits(
44        data: impl Into<Arc<[u8]>>,
45        password: &[u8],
46        limits: ParseLimits,
47    ) -> Result<Self> {
48        let file = PdfFile::parse_with_password_and_limits(data, password, limits)?;
49        let catalog = Catalog::from_trailer(&file)?;
50        Ok(Self {
51            file,
52            catalog,
53            acro_form: OnceLock::new(),
54        })
55    }
56
57    /// True when the document is encrypted (carries an `/Encrypt` dictionary).
58    pub fn is_encrypted(&self) -> bool {
59        self.file.is_encrypted()
60    }
61
62    pub fn page_count(&self) -> usize {
63        self.catalog.page_count
64    }
65
66    pub fn page(&self, index: usize) -> Result<PdfPage> {
67        self.catalog.get_page(&self.file, index)
68    }
69
70    pub fn file(&self) -> &PdfFile {
71        &self.file
72    }
73
74    pub fn version(&self) -> (u8, u8) {
75        (self.file.header.major, self.file.header.minor)
76    }
77
78    /// Get decoded content stream bytes for a page.
79    pub fn page_content_bytes(&self, page: &PdfPage) -> Result<Vec<u8>> {
80        let mut all_bytes = Vec::new();
81        for &content_id in &page.contents {
82            match self.file.resolve_stream_data(content_id) {
83                Ok(bytes) => {
84                    if !all_bytes.is_empty() {
85                        all_bytes.push(b'\n');
86                    }
87                    all_bytes.extend_from_slice(&bytes);
88                }
89                Err(e) => {
90                    tracing::warn!("failed to decode content stream {content_id}: {e}");
91                }
92            }
93        }
94        Ok(all_bytes)
95    }
96
97    /// Load all fonts referenced by a page.
98    pub fn load_page_fonts(&self, page: &PdfPage) -> FontCache {
99        font_loader::load_page_fonts(self.file(), page)
100    }
101
102    /// Parse a page's annotations into renderable form (/Rect, /F, the
103    /// /AS-selected appearance stream, /OC membership). Widget annotations for
104    /// interactive-form fields gain a generated appearance when the producer
105    /// left none (or set /NeedAppearances).
106    pub fn page_annotations(&self, page: &PdfPage) -> Vec<Annotation> {
107        annotation::parse_annotations(&self.file, page, self.acro_form())
108    }
109
110    /// The document's interactive form (`/AcroForm`), if any. Parsed once and
111    /// cached for the lifetime of the document.
112    pub fn acro_form(&self) -> Option<&AcroForm> {
113        self.acro_form
114            .get_or_init(|| AcroForm::parse(&self.file))
115            .as_ref()
116    }
117
118    /// The document's default optional-content configuration, if any.
119    pub fn oc_config(&self) -> Option<OcConfig> {
120        optional_content::parse_oc_config(&self.file)
121    }
122}
123
124#[cfg(test)]
125pub(crate) mod test_util {
126    /// Build a synthetic PDF from numbered object bodies (index `i` becomes
127    /// object `i + 1`), with a correct xref table and a trailer whose /Root is
128    /// object 1. Offsets are computed, so bodies can be edited freely.
129    pub fn build_pdf(objects: &[&str]) -> Vec<u8> {
130        let mut buf = Vec::from(&b"%PDF-1.7\n"[..]);
131        let mut offsets = Vec::with_capacity(objects.len());
132        for (i, body) in objects.iter().enumerate() {
133            offsets.push(buf.len());
134            buf.extend_from_slice(format!("{} 0 obj\n{body}\nendobj\n", i + 1).as_bytes());
135        }
136        let xref_off = buf.len();
137        buf.extend_from_slice(
138            format!("xref\n0 {}\n0000000000 65535 f \n", objects.len() + 1).as_bytes(),
139        );
140        for off in &offsets {
141            buf.extend_from_slice(format!("{off:010} 00000 n \n").as_bytes());
142        }
143        buf.extend_from_slice(
144            format!(
145                "trailer\n<< /Size {} /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF\n",
146                objects.len() + 1
147            )
148            .as_bytes(),
149        );
150        buf
151    }
152}