Skip to main content

zpdf_document/
lib.rs

1pub mod annotation;
2mod catalog;
3pub mod font_loader;
4pub mod optional_content;
5pub mod page;
6
7pub use annotation::Annotation;
8pub use catalog::Catalog;
9pub use optional_content::OcConfig;
10pub use page::{PdfPage, ResourceDict};
11
12use std::sync::Arc;
13use zpdf_core::{ParseLimits, Result};
14use zpdf_font::FontCache;
15use zpdf_parser::PdfFile;
16
17pub struct PdfDocument {
18    file: PdfFile,
19    catalog: Catalog,
20}
21
22impl PdfDocument {
23    pub fn open(data: impl Into<Arc<[u8]>>) -> Result<Self> {
24        Self::open_with_limits(data, ParseLimits::default())
25    }
26
27    pub fn open_with_limits(data: impl Into<Arc<[u8]>>, limits: ParseLimits) -> Result<Self> {
28        let file = PdfFile::parse_with_limits(data, limits)?;
29        let catalog = Catalog::from_trailer(&file)?;
30        Ok(Self { file, catalog })
31    }
32
33    pub fn page_count(&self) -> usize {
34        self.catalog.page_count
35    }
36
37    pub fn page(&self, index: usize) -> Result<PdfPage> {
38        self.catalog.get_page(&self.file, index)
39    }
40
41    pub fn file(&self) -> &PdfFile {
42        &self.file
43    }
44
45    pub fn version(&self) -> (u8, u8) {
46        (self.file.header.major, self.file.header.minor)
47    }
48
49    /// Get decoded content stream bytes for a page.
50    pub fn page_content_bytes(&self, page: &PdfPage) -> Result<Vec<u8>> {
51        let mut all_bytes = Vec::new();
52        for &content_id in &page.contents {
53            match self.file.resolve_stream_data(content_id) {
54                Ok(bytes) => {
55                    if !all_bytes.is_empty() {
56                        all_bytes.push(b'\n');
57                    }
58                    all_bytes.extend_from_slice(&bytes);
59                }
60                Err(e) => {
61                    tracing::warn!("failed to decode content stream {content_id}: {e}");
62                }
63            }
64        }
65        Ok(all_bytes)
66    }
67
68    /// Load all fonts referenced by a page.
69    pub fn load_page_fonts(&self, page: &PdfPage) -> FontCache {
70        font_loader::load_page_fonts(self.file(), page)
71    }
72
73    /// Parse a page's annotations into renderable form (/Rect, /F, the
74    /// /AS-selected appearance stream, /OC membership).
75    pub fn page_annotations(&self, page: &PdfPage) -> Vec<Annotation> {
76        annotation::parse_annotations(&self.file, page)
77    }
78
79    /// The document's default optional-content configuration, if any.
80    pub fn oc_config(&self) -> Option<OcConfig> {
81        optional_content::parse_oc_config(&self.file)
82    }
83}
84
85#[cfg(test)]
86pub(crate) mod test_util {
87    /// Build a synthetic PDF from numbered object bodies (index `i` becomes
88    /// object `i + 1`), with a correct xref table and a trailer whose /Root is
89    /// object 1. Offsets are computed, so bodies can be edited freely.
90    pub fn build_pdf(objects: &[&str]) -> Vec<u8> {
91        let mut buf = Vec::from(&b"%PDF-1.7\n"[..]);
92        let mut offsets = Vec::with_capacity(objects.len());
93        for (i, body) in objects.iter().enumerate() {
94            offsets.push(buf.len());
95            buf.extend_from_slice(format!("{} 0 obj\n{body}\nendobj\n", i + 1).as_bytes());
96        }
97        let xref_off = buf.len();
98        buf.extend_from_slice(
99            format!("xref\n0 {}\n0000000000 65535 f \n", objects.len() + 1).as_bytes(),
100        );
101        for off in &offsets {
102            buf.extend_from_slice(format!("{off:010} 00000 n \n").as_bytes());
103        }
104        buf.extend_from_slice(
105            format!(
106                "trailer\n<< /Size {} /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF\n",
107                objects.len() + 1
108            )
109            .as_bytes(),
110        );
111        buf
112    }
113}