1pub mod annotation;
2mod catalog;
3pub mod font_loader;
4pub mod forms;
5pub mod optional_content;
6pub mod page;
7
8pub use annotation::Annotation;
9pub use catalog::Catalog;
10pub use forms::{AcroForm, FieldKind, FieldValue, FormField};
11pub use optional_content::OcConfig;
12pub use page::{PdfPage, ResourceDict};
13
14use std::sync::{Arc, OnceLock};
15use zpdf_core::{ParseLimits, Result};
16use zpdf_font::FontCache;
17use zpdf_parser::PdfFile;
18
19pub struct PdfDocument {
20 file: PdfFile,
21 catalog: Catalog,
22 acro_form: OnceLock<Option<AcroForm>>,
25}
26
27impl PdfDocument {
28 pub fn open(data: impl Into<Arc<[u8]>>) -> Result<Self> {
29 Self::open_with_limits(data, ParseLimits::default())
30 }
31
32 pub fn open_with_limits(data: impl Into<Arc<[u8]>>, limits: ParseLimits) -> Result<Self> {
33 Self::open_with_password_and_limits(data, b"", limits)
34 }
35
36 pub fn open_with_password(data: impl Into<Arc<[u8]>>, password: &[u8]) -> Result<Self> {
40 Self::open_with_password_and_limits(data, password, ParseLimits::default())
41 }
42
43 pub fn open_with_password_and_limits(
44 data: impl Into<Arc<[u8]>>,
45 password: &[u8],
46 limits: ParseLimits,
47 ) -> Result<Self> {
48 let file = PdfFile::parse_with_password_and_limits(data, password, limits)?;
49 let catalog = Catalog::from_trailer(&file)?;
50 Ok(Self {
51 file,
52 catalog,
53 acro_form: OnceLock::new(),
54 })
55 }
56
57 pub fn is_encrypted(&self) -> bool {
59 self.file.is_encrypted()
60 }
61
62 pub fn page_count(&self) -> usize {
63 self.catalog.page_count
64 }
65
66 pub fn page(&self, index: usize) -> Result<PdfPage> {
67 self.catalog.get_page(&self.file, index)
68 }
69
70 pub fn file(&self) -> &PdfFile {
71 &self.file
72 }
73
74 pub fn version(&self) -> (u8, u8) {
75 (self.file.header.major, self.file.header.minor)
76 }
77
78 pub fn page_content_bytes(&self, page: &PdfPage) -> Result<Vec<u8>> {
80 let mut all_bytes = Vec::new();
81 for &content_id in &page.contents {
82 match self.file.resolve_stream_data(content_id) {
83 Ok(bytes) => {
84 if !all_bytes.is_empty() {
85 all_bytes.push(b'\n');
86 }
87 all_bytes.extend_from_slice(&bytes);
88 }
89 Err(e) => {
90 tracing::warn!("failed to decode content stream {content_id}: {e}");
91 }
92 }
93 }
94 Ok(all_bytes)
95 }
96
97 pub fn load_page_fonts(&self, page: &PdfPage) -> FontCache {
99 font_loader::load_page_fonts(self.file(), page)
100 }
101
102 pub fn page_annotations(&self, page: &PdfPage) -> Vec<Annotation> {
107 annotation::parse_annotations(&self.file, page, self.acro_form())
108 }
109
110 pub fn acro_form(&self) -> Option<&AcroForm> {
113 self.acro_form
114 .get_or_init(|| AcroForm::parse(&self.file))
115 .as_ref()
116 }
117
118 pub fn oc_config(&self) -> Option<OcConfig> {
120 optional_content::parse_oc_config(&self.file)
121 }
122}
123
124#[cfg(test)]
125pub(crate) mod test_util {
126 pub fn build_pdf(objects: &[&str]) -> Vec<u8> {
130 let mut buf = Vec::from(&b"%PDF-1.7\n"[..]);
131 let mut offsets = Vec::with_capacity(objects.len());
132 for (i, body) in objects.iter().enumerate() {
133 offsets.push(buf.len());
134 buf.extend_from_slice(format!("{} 0 obj\n{body}\nendobj\n", i + 1).as_bytes());
135 }
136 let xref_off = buf.len();
137 buf.extend_from_slice(
138 format!("xref\n0 {}\n0000000000 65535 f \n", objects.len() + 1).as_bytes(),
139 );
140 for off in &offsets {
141 buf.extend_from_slice(format!("{off:010} 00000 n \n").as_bytes());
142 }
143 buf.extend_from_slice(
144 format!(
145 "trailer\n<< /Size {} /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF\n",
146 objects.len() + 1
147 )
148 .as_bytes(),
149 );
150 buf
151 }
152}