Skip to main content

pdf_lib_rs/api/
pdf_document.rs

1use crate::core::context::PdfContext;
2use crate::core::errors::{PdfError, Result};
3use crate::core::objects::*;
4use crate::core::parser::PdfParser;
5use crate::core::writers::PdfWriter;
6
7/// Options for loading a PDF document.
8#[derive(Debug, Default)]
9pub struct LoadOptions {
10    /// If true, encrypted PDFs will be loaded without error.
11    pub ignore_encryption: bool,
12    /// If true, invalid objects will cause an error instead of a warning.
13    pub throw_on_invalid_object: bool,
14}
15
16/// A high-level representation of a PDF document.
17///
18/// This is the main entry point for creating and modifying PDF documents.
19pub struct PdfDocument {
20    context: PdfContext,
21    is_encrypted: bool,
22}
23
24impl PdfDocument {
25    /// Create a new, empty PDF document.
26    pub fn create() -> Self {
27        let mut context = PdfContext::create();
28
29        // Create the minimal document structure:
30        // 1. Page tree root
31        let mut pages_dict = PdfDict::new();
32        pages_dict.set(PdfName::of("Type"), PdfObject::Name(PdfName::of("Pages")));
33        pages_dict.set(PdfName::of("Kids"), PdfObject::Array(PdfArray::new()));
34        pages_dict.set(PdfName::of("Count"), PdfObject::Number(PdfNumber::of(0.0)));
35        let pages_ref = context.register(PdfObject::Dict(pages_dict));
36
37        // 2. Catalog
38        let mut catalog_dict = PdfDict::new();
39        catalog_dict.set(PdfName::of("Type"), PdfObject::Name(PdfName::of("Catalog")));
40        catalog_dict.set(PdfName::of("Pages"), PdfObject::Ref(pages_ref));
41        let catalog_ref = context.register(PdfObject::Dict(catalog_dict));
42
43        context.trailer_info.root = Some(PdfObject::Ref(catalog_ref));
44
45        PdfDocument {
46            context,
47            is_encrypted: false,
48        }
49    }
50
51    /// Load an existing PDF document from bytes.
52    pub fn load(bytes: &[u8]) -> Result<Self> {
53        Self::load_with_options(bytes, LoadOptions::default())
54    }
55
56    /// Load an existing PDF document from bytes with options.
57    pub fn load_with_options(bytes: &[u8], options: LoadOptions) -> Result<Self> {
58        let parser = PdfParser::for_bytes_with_options(bytes, options.throw_on_invalid_object);
59        let context = parser.parse_document()?;
60
61        let is_encrypted = context.trailer_info.encrypt.is_some();
62
63        if is_encrypted && !options.ignore_encryption {
64            return Err(PdfError::EncryptedPdf);
65        }
66
67        Ok(PdfDocument {
68            context,
69            is_encrypted,
70        })
71    }
72
73    /// Save the document to PDF bytes.
74    pub fn save(&self) -> Vec<u8> {
75        PdfWriter::serialize_to_buffer(&self.context)
76    }
77
78    /// Returns true if the document is encrypted.
79    pub fn is_encrypted(&self) -> bool {
80        self.is_encrypted
81    }
82
83    /// Get the number of pages in the document.
84    pub fn get_page_count(&self) -> usize {
85        self.get_page_refs().len()
86    }
87
88    /// Get the page indices (0-based).
89    pub fn get_page_indices(&self) -> Vec<usize> {
90        (0..self.get_page_count()).collect()
91    }
92
93    /// Add a new blank page to the end of the document with the given size.
94    pub fn add_page(&mut self, size: [f64; 2]) -> PdfRef {
95        let pages_ref = self.get_pages_ref();
96
97        // Create the page
98        let mut page_dict = PdfDict::new();
99        page_dict.set(PdfName::of("Type"), PdfObject::Name(PdfName::of("Page")));
100        page_dict.set(PdfName::of("Parent"), PdfObject::Ref(pages_ref.clone()));
101
102        let mut media_box = PdfArray::new();
103        media_box.push(PdfObject::Number(PdfNumber::of(0.0)));
104        media_box.push(PdfObject::Number(PdfNumber::of(0.0)));
105        media_box.push(PdfObject::Number(PdfNumber::of(size[0])));
106        media_box.push(PdfObject::Number(PdfNumber::of(size[1])));
107        page_dict.set(PdfName::of("MediaBox"), PdfObject::Array(media_box));
108
109        let page_ref = self.context.register(PdfObject::Dict(page_dict));
110
111        // Add to page tree
112        self.add_page_ref_to_tree(&pages_ref, &page_ref);
113
114        page_ref
115    }
116
117    /// Insert a new blank page at the given index.
118    pub fn insert_page(&mut self, index: usize, size: [f64; 2]) -> PdfRef {
119        let pages_ref = self.get_pages_ref();
120
121        let mut page_dict = PdfDict::new();
122        page_dict.set(PdfName::of("Type"), PdfObject::Name(PdfName::of("Page")));
123        page_dict.set(PdfName::of("Parent"), PdfObject::Ref(pages_ref.clone()));
124
125        let mut media_box = PdfArray::new();
126        media_box.push(PdfObject::Number(PdfNumber::of(0.0)));
127        media_box.push(PdfObject::Number(PdfNumber::of(0.0)));
128        media_box.push(PdfObject::Number(PdfNumber::of(size[0])));
129        media_box.push(PdfObject::Number(PdfNumber::of(size[1])));
130        page_dict.set(PdfName::of("MediaBox"), PdfObject::Array(media_box));
131
132        let page_ref = self.context.register(PdfObject::Dict(page_dict));
133
134        // Insert at index in Kids array
135        self.insert_page_ref_in_tree(&pages_ref, &page_ref, index);
136
137        page_ref
138    }
139
140    /// Remove a page at the given index.
141    pub fn remove_page(&mut self, index: usize) {
142        let pages_ref = self.get_pages_ref();
143        if let Some(PdfObject::Dict(pages_dict)) = self.context.lookup(&pages_ref).cloned() {
144            if let Some(PdfObject::Array(mut kids)) = pages_dict.get(&PdfName::of("Kids")).cloned() {
145                if index < kids.size() {
146                    kids.remove(index);
147                    let new_count = kids.size() as f64;
148                    let mut new_pages_dict = pages_dict.clone();
149                    new_pages_dict.set(PdfName::of("Kids"), PdfObject::Array(kids));
150                    new_pages_dict.set(PdfName::of("Count"), PdfObject::Number(PdfNumber::of(new_count)));
151                    self.context.assign(&pages_ref, PdfObject::Dict(new_pages_dict));
152                }
153            }
154        }
155    }
156
157    /// Copy pages from another document. Returns the new page refs.
158    pub fn copy_pages(&mut self, src_doc: &PdfDocument, indices: &[usize]) -> Vec<PdfRef> {
159        let src_page_refs = src_doc.get_page_refs();
160        let pages_ref = self.get_pages_ref();
161        let mut new_refs = Vec::new();
162
163        for &idx in indices {
164            if idx >= src_page_refs.len() {
165                continue;
166            }
167            let src_page_ref = &src_page_refs[idx];
168
169            // Deep-copy the page object
170            if let Some(src_page) = src_doc.context.lookup(src_page_ref) {
171                let mut page = src_page.clone();
172
173                // Update the Parent reference to our page tree
174                if let PdfObject::Dict(ref mut dict) = page {
175                    dict.set(PdfName::of("Parent"), PdfObject::Ref(pages_ref.clone()));
176                }
177
178                let new_ref = self.context.register(page);
179                self.add_page_ref_to_tree(&pages_ref, &new_ref);
180                new_refs.push(new_ref);
181            }
182        }
183
184        new_refs
185    }
186
187    /// Set the document title.
188    pub fn set_title(&mut self, title: &str) {
189        self.set_info_field("Title", title);
190    }
191
192    /// Set the document author.
193    pub fn set_author(&mut self, author: &str) {
194        self.set_info_field("Author", author);
195    }
196
197    /// Set the document subject.
198    pub fn set_subject(&mut self, subject: &str) {
199        self.set_info_field("Subject", subject);
200    }
201
202    /// Set the document keywords.
203    pub fn set_keywords(&mut self, keywords: &[&str]) {
204        self.set_info_field("Keywords", &keywords.join(", "));
205    }
206
207    /// Set the document creator.
208    pub fn set_creator(&mut self, creator: &str) {
209        self.set_info_field("Creator", creator);
210    }
211
212    /// Set the document producer.
213    pub fn set_producer(&mut self, producer: &str) {
214        self.set_info_field("Producer", producer);
215    }
216
217    /// Get the document title, if any.
218    pub fn get_title(&self) -> Option<String> {
219        self.get_info_field("Title")
220    }
221
222    /// Get the document author, if any.
223    pub fn get_author(&self) -> Option<String> {
224        self.get_info_field("Author")
225    }
226
227    /// Get direct access to the context (for advanced use).
228    pub fn context(&self) -> &PdfContext {
229        &self.context
230    }
231
232    /// Get mutable access to the context.
233    pub fn context_mut(&mut self) -> &mut PdfContext {
234        &mut self.context
235    }
236
237    // --- Private helpers ---
238
239    fn get_catalog_ref(&self) -> Option<PdfRef> {
240        if let Some(PdfObject::Ref(r)) = &self.context.trailer_info.root {
241            Some(r.clone())
242        } else {
243            None
244        }
245    }
246
247    fn get_pages_ref(&self) -> PdfRef {
248        if let Some(catalog_ref) = self.get_catalog_ref() {
249            if let Some(PdfObject::Dict(catalog)) = self.context.lookup(&catalog_ref) {
250                if let Some(PdfObject::Ref(pages_ref)) = catalog.get(&PdfName::of("Pages")) {
251                    return pages_ref.clone();
252                }
253            }
254        }
255        // Fallback: should not happen in a well-formed document
256        PdfRef::of(1, 0)
257    }
258
259    /// Get the refs for each page (public for inspection).
260    pub fn get_page_refs(&self) -> Vec<PdfRef> {
261        let pages_ref = self.get_pages_ref();
262        self.collect_page_refs(&pages_ref)
263    }
264
265    fn collect_page_refs(&self, node_ref: &PdfRef) -> Vec<PdfRef> {
266        let mut result = Vec::new();
267        if let Some(PdfObject::Dict(dict)) = self.context.lookup(node_ref) {
268            if let Some(PdfObject::Name(type_name)) = dict.get(&PdfName::of("Type")) {
269                let type_str = type_name.as_string();
270                if type_str == "/Page" {
271                    result.push(node_ref.clone());
272                } else if type_str == "/Pages" {
273                    if let Some(PdfObject::Array(kids)) = dict.get(&PdfName::of("Kids")) {
274                        for i in 0..kids.size() {
275                            if let Some(PdfObject::Ref(kid_ref)) = kids.get(i) {
276                                result.extend(self.collect_page_refs(kid_ref));
277                            }
278                        }
279                    }
280                }
281            }
282        }
283        result
284    }
285
286    fn add_page_ref_to_tree(&mut self, pages_ref: &PdfRef, page_ref: &PdfRef) {
287        if let Some(PdfObject::Dict(pages_dict)) = self.context.lookup(pages_ref).cloned() {
288            let mut kids = if let Some(PdfObject::Array(k)) = pages_dict.get(&PdfName::of("Kids")) {
289                k.clone()
290            } else {
291                PdfArray::new()
292            };
293
294            kids.push(PdfObject::Ref(page_ref.clone()));
295            let new_count = kids.size() as f64;
296
297            let mut new_dict = pages_dict.clone();
298            new_dict.set(PdfName::of("Kids"), PdfObject::Array(kids));
299            new_dict.set(PdfName::of("Count"), PdfObject::Number(PdfNumber::of(new_count)));
300            self.context.assign(pages_ref, PdfObject::Dict(new_dict));
301        }
302    }
303
304    fn insert_page_ref_in_tree(&mut self, pages_ref: &PdfRef, page_ref: &PdfRef, index: usize) {
305        if let Some(PdfObject::Dict(pages_dict)) = self.context.lookup(pages_ref).cloned() {
306            let mut kids = if let Some(PdfObject::Array(k)) = pages_dict.get(&PdfName::of("Kids")) {
307                k.clone()
308            } else {
309                PdfArray::new()
310            };
311
312            let insert_idx = index.min(kids.size());
313            kids.insert(insert_idx, PdfObject::Ref(page_ref.clone()));
314            let new_count = kids.size() as f64;
315
316            let mut new_dict = pages_dict.clone();
317            new_dict.set(PdfName::of("Kids"), PdfObject::Array(kids));
318            new_dict.set(PdfName::of("Count"), PdfObject::Number(PdfNumber::of(new_count)));
319            self.context.assign(pages_ref, PdfObject::Dict(new_dict));
320        }
321    }
322
323    fn get_or_create_info_dict(&mut self) -> PdfRef {
324        // Check if Info dict already exists
325        if let Some(PdfObject::Ref(info_ref)) = &self.context.trailer_info.info {
326            return info_ref.clone();
327        }
328
329        // Create new Info dictionary
330        let info_dict = PdfDict::new();
331        let info_ref = self.context.register(PdfObject::Dict(info_dict));
332        self.context.trailer_info.info = Some(PdfObject::Ref(info_ref.clone()));
333        info_ref
334    }
335
336    fn set_info_field(&mut self, field: &str, value: &str) {
337        let info_ref = self.get_or_create_info_dict();
338        if let Some(PdfObject::Dict(info_dict)) = self.context.lookup(&info_ref).cloned() {
339            let mut new_dict = info_dict;
340            new_dict.set(
341                PdfName::of(field),
342                PdfObject::HexString(PdfHexString::from_text(value)),
343            );
344            self.context.assign(&info_ref, PdfObject::Dict(new_dict));
345        }
346    }
347
348    fn get_info_field(&self, field: &str) -> Option<String> {
349        if let Some(PdfObject::Ref(info_ref)) = &self.context.trailer_info.info {
350            if let Some(PdfObject::Dict(info_dict)) = self.context.lookup(info_ref) {
351                match info_dict.get(&PdfName::of(field)) {
352                    Some(PdfObject::String(s)) => return Some(s.decode_text()),
353                    Some(PdfObject::HexString(s)) => return Some(s.decode_text()),
354                    _ => return None,
355                }
356            }
357        }
358        None
359    }
360}
361
362#[cfg(test)]
363mod tests {
364    use super::*;
365    use crate::api::sizes::PageSizes;
366
367    #[test]
368    fn can_create_empty_document() {
369        let doc = PdfDocument::create();
370        assert_eq!(doc.get_page_count(), 0);
371        assert!(!doc.is_encrypted());
372    }
373
374    #[test]
375    fn can_add_pages() {
376        let mut doc = PdfDocument::create();
377        doc.add_page(PageSizes::LETTER);
378        doc.add_page(PageSizes::A4);
379        assert_eq!(doc.get_page_count(), 2);
380    }
381
382    #[test]
383    fn can_insert_page() {
384        let mut doc = PdfDocument::create();
385        doc.add_page(PageSizes::LETTER);
386        doc.add_page(PageSizes::LETTER);
387        doc.insert_page(1, PageSizes::A4);
388        assert_eq!(doc.get_page_count(), 3);
389    }
390
391    #[test]
392    fn can_remove_page() {
393        let mut doc = PdfDocument::create();
394        doc.add_page(PageSizes::LETTER);
395        doc.add_page(PageSizes::A4);
396        assert_eq!(doc.get_page_count(), 2);
397        doc.remove_page(0);
398        assert_eq!(doc.get_page_count(), 1);
399    }
400
401    #[test]
402    fn can_set_and_get_metadata() {
403        let mut doc = PdfDocument::create();
404        doc.set_title("Test Document");
405        doc.set_author("Test Author");
406        assert_eq!(doc.get_title(), Some("Test Document".to_string()));
407        assert_eq!(doc.get_author(), Some("Test Author".to_string()));
408    }
409
410    #[test]
411    fn can_save_and_reload() {
412        let mut doc = PdfDocument::create();
413        doc.add_page(PageSizes::LETTER);
414        doc.add_page(PageSizes::A4);
415        doc.set_title("Roundtrip Test");
416
417        let bytes = doc.save();
418
419        let doc2 = PdfDocument::load(&bytes).unwrap();
420        assert_eq!(doc2.get_page_count(), 2);
421        assert_eq!(doc2.get_title(), Some("Roundtrip Test".to_string()));
422    }
423
424    #[test]
425    fn can_copy_pages_between_documents() {
426        let mut doc1 = PdfDocument::create();
427        doc1.add_page(PageSizes::LETTER);
428        doc1.add_page(PageSizes::A4);
429        doc1.add_page(PageSizes::LEGAL);
430
431        let mut doc2 = PdfDocument::create();
432        let copied = doc2.copy_pages(&doc1, &[0, 2]);
433        assert_eq!(copied.len(), 2);
434        assert_eq!(doc2.get_page_count(), 2);
435    }
436
437    #[test]
438    fn can_load_real_pdf() {
439        let bytes = std::fs::read("test_assets/pdfs/normal.pdf").unwrap();
440        let doc = PdfDocument::load(&bytes).unwrap();
441        assert!(doc.get_page_count() > 0);
442        assert!(!doc.is_encrypted());
443    }
444
445    #[test]
446    fn throws_for_encrypted_pdf() {
447        let bytes = std::fs::read("test_assets/pdfs/encrypted_old.pdf").unwrap();
448        let result = PdfDocument::load(&bytes);
449        assert!(result.is_err());
450    }
451
452    #[test]
453    fn allows_encrypted_pdf_with_ignore_flag() {
454        let bytes = std::fs::read("test_assets/pdfs/encrypted_old.pdf").unwrap();
455        let result = PdfDocument::load_with_options(
456            &bytes,
457            LoadOptions {
458                ignore_encryption: true,
459                ..Default::default()
460            },
461        );
462        assert!(result.is_ok());
463        assert!(result.unwrap().is_encrypted());
464    }
465
466    #[test]
467    fn roundtrip_load_save_load() {
468        let bytes = std::fs::read("test_assets/pdfs/normal.pdf").unwrap();
469        let doc = PdfDocument::load(&bytes).unwrap();
470        let page_count = doc.get_page_count();
471
472        let saved_bytes = doc.save();
473        let doc2 = PdfDocument::load(&saved_bytes).unwrap();
474        assert_eq!(doc2.get_page_count(), page_count);
475    }
476}