1use crate::core::context::PdfContext;
2use crate::core::errors::{PdfError, Result};
3use crate::core::objects::*;
4use crate::core::parser::PdfParser;
5use crate::core::writers::PdfWriter;
6
7#[derive(Debug, Default)]
9pub struct LoadOptions {
10 pub ignore_encryption: bool,
12 pub throw_on_invalid_object: bool,
14}
15
16pub struct PdfDocument {
20 context: PdfContext,
21 is_encrypted: bool,
22}
23
24impl PdfDocument {
25 pub fn create() -> Self {
27 let mut context = PdfContext::create();
28
29 let mut pages_dict = PdfDict::new();
32 pages_dict.set(PdfName::of("Type"), PdfObject::Name(PdfName::of("Pages")));
33 pages_dict.set(PdfName::of("Kids"), PdfObject::Array(PdfArray::new()));
34 pages_dict.set(PdfName::of("Count"), PdfObject::Number(PdfNumber::of(0.0)));
35 let pages_ref = context.register(PdfObject::Dict(pages_dict));
36
37 let mut catalog_dict = PdfDict::new();
39 catalog_dict.set(PdfName::of("Type"), PdfObject::Name(PdfName::of("Catalog")));
40 catalog_dict.set(PdfName::of("Pages"), PdfObject::Ref(pages_ref));
41 let catalog_ref = context.register(PdfObject::Dict(catalog_dict));
42
43 context.trailer_info.root = Some(PdfObject::Ref(catalog_ref));
44
45 PdfDocument {
46 context,
47 is_encrypted: false,
48 }
49 }
50
51 pub fn load(bytes: &[u8]) -> Result<Self> {
53 Self::load_with_options(bytes, LoadOptions::default())
54 }
55
56 pub fn load_with_options(bytes: &[u8], options: LoadOptions) -> Result<Self> {
58 let parser = PdfParser::for_bytes_with_options(bytes, options.throw_on_invalid_object);
59 let context = parser.parse_document()?;
60
61 let is_encrypted = context.trailer_info.encrypt.is_some();
62
63 if is_encrypted && !options.ignore_encryption {
64 return Err(PdfError::EncryptedPdf);
65 }
66
67 Ok(PdfDocument {
68 context,
69 is_encrypted,
70 })
71 }
72
73 pub fn save(&self) -> Vec<u8> {
75 PdfWriter::serialize_to_buffer(&self.context)
76 }
77
78 pub fn is_encrypted(&self) -> bool {
80 self.is_encrypted
81 }
82
83 pub fn get_page_count(&self) -> usize {
85 self.get_page_refs().len()
86 }
87
88 pub fn get_page_indices(&self) -> Vec<usize> {
90 (0..self.get_page_count()).collect()
91 }
92
93 pub fn add_page(&mut self, size: [f64; 2]) -> PdfRef {
95 let pages_ref = self.get_pages_ref();
96
97 let mut page_dict = PdfDict::new();
99 page_dict.set(PdfName::of("Type"), PdfObject::Name(PdfName::of("Page")));
100 page_dict.set(PdfName::of("Parent"), PdfObject::Ref(pages_ref.clone()));
101
102 let mut media_box = PdfArray::new();
103 media_box.push(PdfObject::Number(PdfNumber::of(0.0)));
104 media_box.push(PdfObject::Number(PdfNumber::of(0.0)));
105 media_box.push(PdfObject::Number(PdfNumber::of(size[0])));
106 media_box.push(PdfObject::Number(PdfNumber::of(size[1])));
107 page_dict.set(PdfName::of("MediaBox"), PdfObject::Array(media_box));
108
109 let page_ref = self.context.register(PdfObject::Dict(page_dict));
110
111 self.add_page_ref_to_tree(&pages_ref, &page_ref);
113
114 page_ref
115 }
116
117 pub fn insert_page(&mut self, index: usize, size: [f64; 2]) -> PdfRef {
119 let pages_ref = self.get_pages_ref();
120
121 let mut page_dict = PdfDict::new();
122 page_dict.set(PdfName::of("Type"), PdfObject::Name(PdfName::of("Page")));
123 page_dict.set(PdfName::of("Parent"), PdfObject::Ref(pages_ref.clone()));
124
125 let mut media_box = PdfArray::new();
126 media_box.push(PdfObject::Number(PdfNumber::of(0.0)));
127 media_box.push(PdfObject::Number(PdfNumber::of(0.0)));
128 media_box.push(PdfObject::Number(PdfNumber::of(size[0])));
129 media_box.push(PdfObject::Number(PdfNumber::of(size[1])));
130 page_dict.set(PdfName::of("MediaBox"), PdfObject::Array(media_box));
131
132 let page_ref = self.context.register(PdfObject::Dict(page_dict));
133
134 self.insert_page_ref_in_tree(&pages_ref, &page_ref, index);
136
137 page_ref
138 }
139
140 pub fn remove_page(&mut self, index: usize) {
142 let pages_ref = self.get_pages_ref();
143 if let Some(PdfObject::Dict(pages_dict)) = self.context.lookup(&pages_ref).cloned() {
144 if let Some(PdfObject::Array(mut kids)) = pages_dict.get(&PdfName::of("Kids")).cloned() {
145 if index < kids.size() {
146 kids.remove(index);
147 let new_count = kids.size() as f64;
148 let mut new_pages_dict = pages_dict.clone();
149 new_pages_dict.set(PdfName::of("Kids"), PdfObject::Array(kids));
150 new_pages_dict.set(PdfName::of("Count"), PdfObject::Number(PdfNumber::of(new_count)));
151 self.context.assign(&pages_ref, PdfObject::Dict(new_pages_dict));
152 }
153 }
154 }
155 }
156
157 pub fn copy_pages(&mut self, src_doc: &PdfDocument, indices: &[usize]) -> Vec<PdfRef> {
159 let src_page_refs = src_doc.get_page_refs();
160 let pages_ref = self.get_pages_ref();
161 let mut new_refs = Vec::new();
162
163 for &idx in indices {
164 if idx >= src_page_refs.len() {
165 continue;
166 }
167 let src_page_ref = &src_page_refs[idx];
168
169 if let Some(src_page) = src_doc.context.lookup(src_page_ref) {
171 let mut page = src_page.clone();
172
173 if let PdfObject::Dict(ref mut dict) = page {
175 dict.set(PdfName::of("Parent"), PdfObject::Ref(pages_ref.clone()));
176 }
177
178 let new_ref = self.context.register(page);
179 self.add_page_ref_to_tree(&pages_ref, &new_ref);
180 new_refs.push(new_ref);
181 }
182 }
183
184 new_refs
185 }
186
187 pub fn set_title(&mut self, title: &str) {
189 self.set_info_field("Title", title);
190 }
191
192 pub fn set_author(&mut self, author: &str) {
194 self.set_info_field("Author", author);
195 }
196
197 pub fn set_subject(&mut self, subject: &str) {
199 self.set_info_field("Subject", subject);
200 }
201
202 pub fn set_keywords(&mut self, keywords: &[&str]) {
204 self.set_info_field("Keywords", &keywords.join(", "));
205 }
206
207 pub fn set_creator(&mut self, creator: &str) {
209 self.set_info_field("Creator", creator);
210 }
211
212 pub fn set_producer(&mut self, producer: &str) {
214 self.set_info_field("Producer", producer);
215 }
216
217 pub fn get_title(&self) -> Option<String> {
219 self.get_info_field("Title")
220 }
221
222 pub fn get_author(&self) -> Option<String> {
224 self.get_info_field("Author")
225 }
226
227 pub fn context(&self) -> &PdfContext {
229 &self.context
230 }
231
232 pub fn context_mut(&mut self) -> &mut PdfContext {
234 &mut self.context
235 }
236
237 fn get_catalog_ref(&self) -> Option<PdfRef> {
240 if let Some(PdfObject::Ref(r)) = &self.context.trailer_info.root {
241 Some(r.clone())
242 } else {
243 None
244 }
245 }
246
247 fn get_pages_ref(&self) -> PdfRef {
248 if let Some(catalog_ref) = self.get_catalog_ref() {
249 if let Some(PdfObject::Dict(catalog)) = self.context.lookup(&catalog_ref) {
250 if let Some(PdfObject::Ref(pages_ref)) = catalog.get(&PdfName::of("Pages")) {
251 return pages_ref.clone();
252 }
253 }
254 }
255 PdfRef::of(1, 0)
257 }
258
259 pub fn get_page_refs(&self) -> Vec<PdfRef> {
261 let pages_ref = self.get_pages_ref();
262 self.collect_page_refs(&pages_ref)
263 }
264
265 fn collect_page_refs(&self, node_ref: &PdfRef) -> Vec<PdfRef> {
266 let mut result = Vec::new();
267 if let Some(PdfObject::Dict(dict)) = self.context.lookup(node_ref) {
268 if let Some(PdfObject::Name(type_name)) = dict.get(&PdfName::of("Type")) {
269 let type_str = type_name.as_string();
270 if type_str == "/Page" {
271 result.push(node_ref.clone());
272 } else if type_str == "/Pages" {
273 if let Some(PdfObject::Array(kids)) = dict.get(&PdfName::of("Kids")) {
274 for i in 0..kids.size() {
275 if let Some(PdfObject::Ref(kid_ref)) = kids.get(i) {
276 result.extend(self.collect_page_refs(kid_ref));
277 }
278 }
279 }
280 }
281 }
282 }
283 result
284 }
285
286 fn add_page_ref_to_tree(&mut self, pages_ref: &PdfRef, page_ref: &PdfRef) {
287 if let Some(PdfObject::Dict(pages_dict)) = self.context.lookup(pages_ref).cloned() {
288 let mut kids = if let Some(PdfObject::Array(k)) = pages_dict.get(&PdfName::of("Kids")) {
289 k.clone()
290 } else {
291 PdfArray::new()
292 };
293
294 kids.push(PdfObject::Ref(page_ref.clone()));
295 let new_count = kids.size() as f64;
296
297 let mut new_dict = pages_dict.clone();
298 new_dict.set(PdfName::of("Kids"), PdfObject::Array(kids));
299 new_dict.set(PdfName::of("Count"), PdfObject::Number(PdfNumber::of(new_count)));
300 self.context.assign(pages_ref, PdfObject::Dict(new_dict));
301 }
302 }
303
304 fn insert_page_ref_in_tree(&mut self, pages_ref: &PdfRef, page_ref: &PdfRef, index: usize) {
305 if let Some(PdfObject::Dict(pages_dict)) = self.context.lookup(pages_ref).cloned() {
306 let mut kids = if let Some(PdfObject::Array(k)) = pages_dict.get(&PdfName::of("Kids")) {
307 k.clone()
308 } else {
309 PdfArray::new()
310 };
311
312 let insert_idx = index.min(kids.size());
313 kids.insert(insert_idx, PdfObject::Ref(page_ref.clone()));
314 let new_count = kids.size() as f64;
315
316 let mut new_dict = pages_dict.clone();
317 new_dict.set(PdfName::of("Kids"), PdfObject::Array(kids));
318 new_dict.set(PdfName::of("Count"), PdfObject::Number(PdfNumber::of(new_count)));
319 self.context.assign(pages_ref, PdfObject::Dict(new_dict));
320 }
321 }
322
323 fn get_or_create_info_dict(&mut self) -> PdfRef {
324 if let Some(PdfObject::Ref(info_ref)) = &self.context.trailer_info.info {
326 return info_ref.clone();
327 }
328
329 let info_dict = PdfDict::new();
331 let info_ref = self.context.register(PdfObject::Dict(info_dict));
332 self.context.trailer_info.info = Some(PdfObject::Ref(info_ref.clone()));
333 info_ref
334 }
335
336 fn set_info_field(&mut self, field: &str, value: &str) {
337 let info_ref = self.get_or_create_info_dict();
338 if let Some(PdfObject::Dict(info_dict)) = self.context.lookup(&info_ref).cloned() {
339 let mut new_dict = info_dict;
340 new_dict.set(
341 PdfName::of(field),
342 PdfObject::HexString(PdfHexString::from_text(value)),
343 );
344 self.context.assign(&info_ref, PdfObject::Dict(new_dict));
345 }
346 }
347
348 fn get_info_field(&self, field: &str) -> Option<String> {
349 if let Some(PdfObject::Ref(info_ref)) = &self.context.trailer_info.info {
350 if let Some(PdfObject::Dict(info_dict)) = self.context.lookup(info_ref) {
351 match info_dict.get(&PdfName::of(field)) {
352 Some(PdfObject::String(s)) => return Some(s.decode_text()),
353 Some(PdfObject::HexString(s)) => return Some(s.decode_text()),
354 _ => return None,
355 }
356 }
357 }
358 None
359 }
360}
361
362#[cfg(test)]
363mod tests {
364 use super::*;
365 use crate::api::sizes::PageSizes;
366
367 #[test]
368 fn can_create_empty_document() {
369 let doc = PdfDocument::create();
370 assert_eq!(doc.get_page_count(), 0);
371 assert!(!doc.is_encrypted());
372 }
373
374 #[test]
375 fn can_add_pages() {
376 let mut doc = PdfDocument::create();
377 doc.add_page(PageSizes::LETTER);
378 doc.add_page(PageSizes::A4);
379 assert_eq!(doc.get_page_count(), 2);
380 }
381
382 #[test]
383 fn can_insert_page() {
384 let mut doc = PdfDocument::create();
385 doc.add_page(PageSizes::LETTER);
386 doc.add_page(PageSizes::LETTER);
387 doc.insert_page(1, PageSizes::A4);
388 assert_eq!(doc.get_page_count(), 3);
389 }
390
391 #[test]
392 fn can_remove_page() {
393 let mut doc = PdfDocument::create();
394 doc.add_page(PageSizes::LETTER);
395 doc.add_page(PageSizes::A4);
396 assert_eq!(doc.get_page_count(), 2);
397 doc.remove_page(0);
398 assert_eq!(doc.get_page_count(), 1);
399 }
400
401 #[test]
402 fn can_set_and_get_metadata() {
403 let mut doc = PdfDocument::create();
404 doc.set_title("Test Document");
405 doc.set_author("Test Author");
406 assert_eq!(doc.get_title(), Some("Test Document".to_string()));
407 assert_eq!(doc.get_author(), Some("Test Author".to_string()));
408 }
409
410 #[test]
411 fn can_save_and_reload() {
412 let mut doc = PdfDocument::create();
413 doc.add_page(PageSizes::LETTER);
414 doc.add_page(PageSizes::A4);
415 doc.set_title("Roundtrip Test");
416
417 let bytes = doc.save();
418
419 let doc2 = PdfDocument::load(&bytes).unwrap();
420 assert_eq!(doc2.get_page_count(), 2);
421 assert_eq!(doc2.get_title(), Some("Roundtrip Test".to_string()));
422 }
423
424 #[test]
425 fn can_copy_pages_between_documents() {
426 let mut doc1 = PdfDocument::create();
427 doc1.add_page(PageSizes::LETTER);
428 doc1.add_page(PageSizes::A4);
429 doc1.add_page(PageSizes::LEGAL);
430
431 let mut doc2 = PdfDocument::create();
432 let copied = doc2.copy_pages(&doc1, &[0, 2]);
433 assert_eq!(copied.len(), 2);
434 assert_eq!(doc2.get_page_count(), 2);
435 }
436
437 #[test]
438 fn can_load_real_pdf() {
439 let bytes = std::fs::read("test_assets/pdfs/normal.pdf").unwrap();
440 let doc = PdfDocument::load(&bytes).unwrap();
441 assert!(doc.get_page_count() > 0);
442 assert!(!doc.is_encrypted());
443 }
444
445 #[test]
446 fn throws_for_encrypted_pdf() {
447 let bytes = std::fs::read("test_assets/pdfs/encrypted_old.pdf").unwrap();
448 let result = PdfDocument::load(&bytes);
449 assert!(result.is_err());
450 }
451
452 #[test]
453 fn allows_encrypted_pdf_with_ignore_flag() {
454 let bytes = std::fs::read("test_assets/pdfs/encrypted_old.pdf").unwrap();
455 let result = PdfDocument::load_with_options(
456 &bytes,
457 LoadOptions {
458 ignore_encryption: true,
459 ..Default::default()
460 },
461 );
462 assert!(result.is_ok());
463 assert!(result.unwrap().is_encrypted());
464 }
465
466 #[test]
467 fn roundtrip_load_save_load() {
468 let bytes = std::fs::read("test_assets/pdfs/normal.pdf").unwrap();
469 let doc = PdfDocument::load(&bytes).unwrap();
470 let page_count = doc.get_page_count();
471
472 let saved_bytes = doc.save();
473 let doc2 = PdfDocument::load(&saved_bytes).unwrap();
474 assert_eq!(doc2.get_page_count(), page_count);
475 }
476}