1use std::collections::HashMap;
5use std::path::Path;
6
7use crate::error::Result;
8use crate::object::{IndirectRef, PdfDict, PdfObject};
9use crate::page::{collect_pages, PageInfo};
10use crate::parser::PdfDocument;
11use crate::writer::page::PageBuilder;
12use crate::writer::serialize::serialize_pdf;
13use crate::writer::PdfWriter;
14
15pub struct DocumentModifier {
18 writer: PdfWriter,
19 catalog_ref: IndirectRef,
20 info_ref: Option<IndirectRef>,
21}
22
23impl DocumentModifier {
24 pub fn from_document(doc: &PdfDocument) -> Result<Self> {
27 let mut writer = PdfWriter::new();
28 writer.version = doc.version;
29
30 let catalog_ref = doc
32 .catalog_ref()
33 .cloned()
34 .unwrap_or(IndirectRef {
35 obj_num: 1,
36 gen_num: 0,
37 });
38
39 let info_ref = doc
41 .trailer()
42 .get_ref(b"Info")
43 .cloned();
44
45 let mut max_obj = 0u32;
47 let refs: Vec<IndirectRef> = doc.object_refs().collect();
48 for iref in &refs {
49 if let Ok(obj) = doc.resolve(iref) {
50 writer.objects.push((iref.obj_num, obj));
51 max_obj = max_obj.max(iref.obj_num);
52 }
53 }
54 writer.next_obj_num = max_obj + 1;
55
56 Ok(Self {
57 writer,
58 catalog_ref,
59 info_ref,
60 })
61 }
62
63 pub fn writer(&mut self) -> &mut PdfWriter {
65 &mut self.writer
66 }
67
68 pub fn catalog_ref(&self) -> &IndirectRef {
70 &self.catalog_ref
71 }
72
73 pub fn set_object(&mut self, obj_num: u32, obj: PdfObject) {
75 self.writer.set_object(obj_num, obj);
76 }
77
78 pub fn add_object(&mut self, obj: PdfObject) -> IndirectRef {
80 self.writer.add_object(obj)
81 }
82
83 pub fn find_object_pub(&self, obj_num: u32) -> Option<&PdfObject> {
85 self.find_object(obj_num)
86 }
87
88 pub fn delete_page(&mut self, page_index: usize) -> Result<()> {
91 let pages_ref = self.find_pages_ref()?;
92 let pages_obj_num = pages_ref.obj_num;
93
94 let pages_obj = self.find_object(pages_obj_num)
96 .cloned()
97 .unwrap_or(PdfObject::Null);
98
99 if let PdfObject::Dict(mut pages_dict) = pages_obj {
100 if let Some(PdfObject::Array(mut kids)) = pages_dict.remove(b"Kids") {
101 if page_index < kids.len() {
102 kids.remove(page_index);
103 let count = kids.len() as i64;
104 pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(kids));
105 pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
106 self.writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
107 }
108 }
109 }
110
111 Ok(())
112 }
113
114 pub fn insert_page(&mut self, page_index: usize, page: PageBuilder) -> Result<()> {
116 let pages_ref = self.find_pages_ref()?;
117 let pages_obj_num = pages_ref.obj_num;
118
119 let page_ref = page.build(&mut self.writer, &pages_ref);
120
121 let pages_obj = self.find_object(pages_obj_num)
122 .cloned()
123 .unwrap_or(PdfObject::Null);
124
125 if let PdfObject::Dict(mut pages_dict) = pages_obj {
126 if let Some(PdfObject::Array(mut kids)) = pages_dict.remove(b"Kids") {
127 let idx = page_index.min(kids.len());
128 kids.insert(idx, PdfObject::Reference(page_ref));
129 let count = kids.len() as i64;
130 pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(kids));
131 pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
132 self.writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
133 }
134 }
135
136 Ok(())
137 }
138
139 pub fn reorder_pages(&mut self, order: &[usize]) -> Result<()> {
141 let pages_ref = self.find_pages_ref()?;
142 let pages_obj_num = pages_ref.obj_num;
143
144 let pages_obj = self.find_object(pages_obj_num)
145 .cloned()
146 .unwrap_or(PdfObject::Null);
147
148 if let PdfObject::Dict(mut pages_dict) = pages_obj {
149 if let Some(PdfObject::Array(kids)) = pages_dict.remove(b"Kids") {
150 let mut new_kids = Vec::with_capacity(order.len());
151 for &idx in order {
152 if idx < kids.len() {
153 new_kids.push(kids[idx].clone());
154 }
155 }
156 let count = new_kids.len() as i64;
157 pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(new_kids));
158 pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
159 self.writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
160 }
161 }
162
163 Ok(())
164 }
165
166 pub fn set_info(&mut self, key: &[u8], value: &str) {
168 let info_num = if let Some(ref r) = self.info_ref {
169 r.obj_num
170 } else {
171 let num = self.writer.alloc_object_num();
172 self.info_ref = Some(IndirectRef {
173 obj_num: num,
174 gen_num: 0,
175 });
176 num
177 };
178
179 let info_obj = self.find_object(info_num)
181 .cloned()
182 .unwrap_or(PdfObject::Dict(PdfDict::new()));
183
184 if let PdfObject::Dict(mut info_dict) = info_obj {
185 info_dict.insert(
186 key.to_vec(),
187 PdfObject::String(value.as_bytes().to_vec()),
188 );
189 self.writer.set_object(info_num, PdfObject::Dict(info_dict));
190 }
191 }
192
193 pub fn garbage_collect(&mut self) {
198 let mut reachable = std::collections::HashSet::new();
199
200 reachable.insert(self.catalog_ref.obj_num);
202 if let Some(ref info) = self.info_ref {
203 reachable.insert(info.obj_num);
204 }
205
206 let mut work: Vec<u32> = reachable.iter().copied().collect();
208 while let Some(obj_num) = work.pop() {
209 if let Some(obj) = self.find_object(obj_num).cloned() {
210 let refs = collect_references(&obj);
211 for r in refs {
212 if reachable.insert(r) {
213 work.push(r);
214 }
215 }
216 }
217 }
218
219 self.writer.objects.retain(|(num, _)| reachable.contains(num));
221 }
222
223 pub fn build(self) -> Result<Vec<u8>> {
225 serialize_pdf(
226 &self.writer.objects,
227 self.writer.version,
228 &self.catalog_ref,
229 self.info_ref.as_ref(),
230 )
231 }
232
233 pub fn build_with_xref_stream(
236 self,
237 compressed: &[crate::writer::object_stream::CompressedObjInfo],
238 ) -> Result<Vec<u8>> {
239 crate::writer::serialize::serialize_pdf_with_xref_stream(
240 &self.writer.objects,
241 compressed,
242 self.writer.version,
243 &self.catalog_ref,
244 self.info_ref.as_ref(),
245 )
246 }
247
248 pub fn save(self, path: &Path) -> Result<()> {
250 let bytes = self.build()?;
251 std::fs::write(path, bytes)?;
252 Ok(())
253 }
254
255 fn find_pages_ref(&self) -> Result<IndirectRef> {
258 if let Some(PdfObject::Dict(catalog)) = self.find_object(self.catalog_ref.obj_num) {
260 if let Some(PdfObject::Reference(r)) = catalog.get(b"Pages") {
261 return Ok(r.clone());
262 }
263 }
264 Ok(IndirectRef {
266 obj_num: 2,
267 gen_num: 0,
268 })
269 }
270
271 fn find_object(&self, obj_num: u32) -> Option<&PdfObject> {
272 self.writer
273 .objects
274 .iter()
275 .find(|(n, _)| *n == obj_num)
276 .map(|(_, o)| o)
277 }
278}
279
280fn collect_references(obj: &PdfObject) -> Vec<u32> {
282 let mut refs = Vec::new();
283 collect_references_inner(obj, &mut refs);
284 refs
285}
286
287fn collect_references_inner(obj: &PdfObject, refs: &mut Vec<u32>) {
288 match obj {
289 PdfObject::Reference(r) => {
290 refs.push(r.obj_num);
291 }
292 PdfObject::Dict(d) => {
293 for (_, val) in d.iter() {
294 collect_references_inner(val, refs);
295 }
296 }
297 PdfObject::Array(arr) => {
298 for item in arr {
299 collect_references_inner(item, refs);
300 }
301 }
302 PdfObject::Stream { dict, .. } => {
303 for (_, val) in dict.iter() {
304 collect_references_inner(val, refs);
305 }
306 }
307 _ => {}
308 }
309}
310
311pub fn incremental_save(original_data: &[u8], modifier: DocumentModifier) -> Result<Vec<u8>> {
316 use std::io::Write;
317
318 let old_startxref = crate::xref::find_startxref(original_data)?;
320
321 let mut buf = original_data.to_vec();
322
323 let max_obj_num = modifier
325 .writer
326 .objects
327 .iter()
328 .map(|(n, _)| *n)
329 .max()
330 .unwrap_or(0);
331 let xref_size = max_obj_num + 1;
332
333 let mut offsets: Vec<(u32, usize)> = Vec::new();
335 for (obj_num, obj) in &modifier.writer.objects {
336 let offset = buf.len();
337 offsets.push((*obj_num, offset));
338 write!(buf, "{} 0 obj\n", obj_num)?;
339 write!(buf, "{}", obj)?;
341 write!(buf, "\nendobj\n")?;
342 }
343
344 let new_xref_offset = buf.len();
346 write!(buf, "xref\n")?;
347
348 let mut sorted_offsets = offsets.clone();
351 sorted_offsets.sort_by_key(|(n, _)| *n);
352
353 for (obj_num, offset) in &sorted_offsets {
355 write!(buf, "{} 1\n", obj_num)?;
356 write!(buf, "{:010} {:05} n \r\n", offset, 0)?;
357 }
358
359 let mut trailer = PdfDict::new();
361 trailer.insert(b"Size".to_vec(), PdfObject::Integer(xref_size as i64));
362 trailer.insert(
363 b"Root".to_vec(),
364 PdfObject::Reference(modifier.catalog_ref.clone()),
365 );
366 if let Some(ref info) = modifier.info_ref {
367 trailer.insert(b"Info".to_vec(), PdfObject::Reference(info.clone()));
368 }
369 trailer.insert(
370 b"Prev".to_vec(),
371 PdfObject::Integer(old_startxref as i64),
372 );
373
374 write!(buf, "trailer\n")?;
375 write!(buf, "{}", PdfObject::Dict(trailer))?;
376 write!(buf, "\n")?;
377
378 write!(buf, "startxref\n{}\n%%EOF\n", new_xref_offset)?;
379
380 Ok(buf)
381}
382
383pub fn merge_documents(docs: &[&PdfDocument]) -> Result<Vec<u8>> {
388 let mut writer = PdfWriter::new();
389 let pages_obj_num = writer.alloc_object_num();
390 let pages_ref = IndirectRef {
391 obj_num: pages_obj_num,
392 gen_num: 0,
393 };
394
395 let mut all_page_refs: Vec<IndirectRef> = Vec::new();
396
397 for doc in docs.iter() {
398 let pages = collect_pages(*doc)?;
399 for page_info in &pages {
400 let page_ref = graft_page(&mut writer, *doc, page_info, &pages_ref)?;
401 all_page_refs.push(page_ref);
402 }
403 }
404
405 let kids: Vec<PdfObject> = all_page_refs
407 .iter()
408 .map(|r| PdfObject::Reference(r.clone()))
409 .collect();
410 let count = kids.len() as i64;
411
412 let mut pages_dict = PdfDict::new();
413 pages_dict.insert(b"Type".to_vec(), PdfObject::Name(b"Pages".to_vec()));
414 pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(kids));
415 pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
416 writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
417
418 let mut catalog_dict = PdfDict::new();
420 catalog_dict.insert(b"Type".to_vec(), PdfObject::Name(b"Catalog".to_vec()));
421 catalog_dict.insert(b"Pages".to_vec(), PdfObject::Reference(pages_ref));
422 let catalog_ref = writer.add_object(PdfObject::Dict(catalog_dict));
423
424 serialize_pdf(&writer.objects, (1, 7), &catalog_ref, None)
425}
426
427fn graft_page(
430 writer: &mut PdfWriter,
431 doc: &PdfDocument,
432 page_info: &PageInfo,
433 new_pages_ref: &IndirectRef,
434) -> Result<IndirectRef> {
435 let mut remap: HashMap<u32, u32> = HashMap::new();
436
437 let page_obj = doc.resolve(&page_info.page_ref)?;
439
440 let new_page_obj = deep_copy_object(writer, doc, &page_obj, &mut remap)?;
442
443 if let PdfObject::Dict(mut page_dict) = new_page_obj {
445 page_dict.insert(
446 b"Parent".to_vec(),
447 PdfObject::Reference(new_pages_ref.clone()),
448 );
449 Ok(writer.add_object(PdfObject::Dict(page_dict)))
450 } else {
451 Ok(writer.add_object(new_page_obj))
452 }
453}
454
455fn deep_copy_object(
457 writer: &mut PdfWriter,
458 doc: &PdfDocument,
459 obj: &PdfObject,
460 remap: &mut HashMap<u32, u32>,
461) -> Result<PdfObject> {
462 match obj {
463 PdfObject::Reference(r) => {
464 if let Some(&new_num) = remap.get(&r.obj_num) {
466 return Ok(PdfObject::Reference(IndirectRef {
467 obj_num: new_num,
468 gen_num: 0,
469 }));
470 }
471
472 let new_num = writer.alloc_object_num();
474 remap.insert(r.obj_num, new_num);
475
476 let resolved = doc.resolve(r)?;
478 let copied = deep_copy_object(writer, doc, &resolved, remap)?;
479 writer.set_object(new_num, copied);
480
481 Ok(PdfObject::Reference(IndirectRef {
482 obj_num: new_num,
483 gen_num: 0,
484 }))
485 }
486 PdfObject::Dict(d) => {
487 let mut new_dict = PdfDict::new();
488 for (key, val) in d.iter() {
489 let new_val = deep_copy_object(writer, doc, val, remap)?;
490 new_dict.insert(key.clone(), new_val);
491 }
492 Ok(PdfObject::Dict(new_dict))
493 }
494 PdfObject::Array(arr) => {
495 let mut new_arr = Vec::with_capacity(arr.len());
496 for item in arr {
497 new_arr.push(deep_copy_object(writer, doc, item, remap)?);
498 }
499 Ok(PdfObject::Array(new_arr))
500 }
501 PdfObject::Stream { dict, data } => {
502 let mut new_dict = PdfDict::new();
503 for (key, val) in dict.iter() {
504 let new_val = deep_copy_object(writer, doc, val, remap)?;
505 new_dict.insert(key.clone(), new_val);
506 }
507 Ok(PdfObject::Stream {
508 dict: new_dict,
509 data: data.clone(),
510 })
511 }
512 other => Ok(other.clone()),
514 }
515}
516
517#[cfg(test)]
518mod tests {
519 use super::*;
520 use crate::writer::document::DocumentBuilder;
521 use crate::writer::page::PageBuilder;
522
523 fn create_test_pdf(text: &str, num_pages: usize) -> Vec<u8> {
524 let mut doc = DocumentBuilder::new();
525 let font = doc.add_standard_font("Helvetica");
526
527 for i in 0..num_pages {
528 let mut page = PageBuilder::new(612.0, 792.0);
529 page.add_font(&font, "Helvetica");
530 page.begin_text();
531 page.set_font(&font, 12.0);
532 page.move_to(72.0, 720.0);
533 page.show_text(&format!("{} - Page {}", text, i + 1));
534 page.end_text();
535 doc.add_page(page);
536 }
537
538 doc.build().unwrap()
539 }
540
541 #[test]
542 fn test_modifier_roundtrip() {
543 let bytes = create_test_pdf("Hello", 2);
544 let mut doc = PdfDocument::from_bytes(bytes).unwrap();
545
546 let modifier = DocumentModifier::from_document(&doc).unwrap();
547 let new_bytes = modifier.build().unwrap();
548
549 let mut reparsed = PdfDocument::from_bytes(new_bytes).unwrap();
550 let pages = collect_pages(&reparsed).unwrap();
551 assert_eq!(pages.len(), 2);
552 }
553
554 #[test]
555 fn test_delete_page() {
556 let bytes = create_test_pdf("Test", 3);
557 let mut doc = PdfDocument::from_bytes(bytes).unwrap();
558
559 let mut modifier = DocumentModifier::from_document(&doc).unwrap();
560 modifier.delete_page(1).unwrap(); let new_bytes = modifier.build().unwrap();
563 let mut reparsed = PdfDocument::from_bytes(new_bytes).unwrap();
564 let pages = collect_pages(&reparsed).unwrap();
565 assert_eq!(pages.len(), 2);
566 }
567
568 #[test]
569 fn test_reorder_pages() {
570 let bytes = create_test_pdf("Reorder", 3);
571 let mut doc = PdfDocument::from_bytes(bytes).unwrap();
572
573 let mut modifier = DocumentModifier::from_document(&doc).unwrap();
574 modifier.reorder_pages(&[2, 0, 1]).unwrap(); let new_bytes = modifier.build().unwrap();
577 let mut reparsed = PdfDocument::from_bytes(new_bytes).unwrap();
578 let pages = collect_pages(&reparsed).unwrap();
579 assert_eq!(pages.len(), 3);
580 }
581
582 #[test]
583 fn test_set_info() {
584 let bytes = create_test_pdf("Info", 1);
585 let mut doc = PdfDocument::from_bytes(bytes).unwrap();
586
587 let mut modifier = DocumentModifier::from_document(&doc).unwrap();
588 modifier.set_info(b"Title", "New Title");
589 modifier.set_info(b"Author", "New Author");
590
591 let new_bytes = modifier.build().unwrap();
592 let text = String::from_utf8_lossy(&new_bytes);
593 assert!(text.contains("New Title"));
594 assert!(text.contains("New Author"));
595 }
596
597 #[test]
598 fn test_merge_documents() {
599 let bytes1 = create_test_pdf("Doc1", 2);
600 let bytes2 = create_test_pdf("Doc2", 3);
601
602 let mut doc1 = PdfDocument::from_bytes(bytes1).unwrap();
603 let mut doc2 = PdfDocument::from_bytes(bytes2).unwrap();
604
605 let merged = merge_documents(&[&doc1, &doc2]).unwrap();
606
607 let mut reparsed = PdfDocument::from_bytes(merged).unwrap();
608 let pages = collect_pages(&reparsed).unwrap();
609 assert_eq!(pages.len(), 5); }
611
612 #[test]
613 fn test_incremental_save() {
614 let original = create_test_pdf("Original", 1);
615 let original_len = original.len();
616
617 let mut doc = PdfDocument::from_bytes(original.clone()).unwrap();
618 let mut modifier = DocumentModifier::from_document(&doc).unwrap();
619 modifier.set_info(b"Title", "Updated Title");
620
621 let result = incremental_save(&original, modifier).unwrap();
622
623 assert!(result.len() > original_len);
625 assert_eq!(&result[..original_len], &original[..]);
626
627 let text = String::from_utf8_lossy(&result);
629 assert!(text.contains("Updated Title"));
630
631 assert!(text.contains("/Prev"));
633
634 let tail = String::from_utf8_lossy(&result[result.len().saturating_sub(50)..]);
636 assert!(tail.contains("%%EOF"));
637 }
638
639 #[test]
640 fn test_garbage_collect() {
641 let bytes = create_test_pdf("GC Test", 1);
642 let mut doc = PdfDocument::from_bytes(bytes).unwrap();
643 let mut modifier = DocumentModifier::from_document(&doc).unwrap();
644
645 modifier.garbage_collect();
647 let count_baseline = modifier.writer.objects.len();
648
649 modifier.add_object(PdfObject::Integer(999));
651 modifier.add_object(PdfObject::String(b"orphan".to_vec()));
652 let count_with_orphans = modifier.writer.objects.len();
653 assert_eq!(count_with_orphans, count_baseline + 2);
654
655 modifier.garbage_collect();
657 let count_after = modifier.writer.objects.len();
658
659 assert_eq!(count_after, count_baseline);
661 }
662
663 #[test]
664 fn test_resource_conflict_merge() {
665 let bytes1 = create_test_pdf("Doc1", 1);
669 let bytes2 = create_test_pdf("Doc2", 1);
670
671 let mut doc1 = PdfDocument::from_bytes(bytes1).unwrap();
672 let mut doc2 = PdfDocument::from_bytes(bytes2).unwrap();
673
674 let merged = merge_documents(&[&doc1, &doc2]).unwrap();
675
676 let mut reparsed = PdfDocument::from_bytes(merged).unwrap();
677 let pages = collect_pages(&reparsed).unwrap();
678 assert_eq!(pages.len(), 2);
679
680 assert!(reparsed.catalog_ref().is_some());
683 }
684}