1use std::collections::HashMap;
5use std::path::Path;
6
7use crate::error::Result;
8use crate::object::{IndirectRef, PdfDict, PdfObject};
9use crate::page::{collect_pages, PageInfo};
10use crate::parser::PdfDocument;
11use crate::writer::page::PageBuilder;
12use crate::writer::serialize::serialize_pdf;
13use crate::writer::PdfWriter;
14
15pub struct DocumentModifier {
18 writer: PdfWriter,
19 catalog_ref: IndirectRef,
20 info_ref: Option<IndirectRef>,
21}
22
23impl DocumentModifier {
24 pub fn from_document(doc: &PdfDocument) -> Result<Self> {
27 let mut writer = PdfWriter::new();
28 writer.version = doc.version;
29
30 let catalog_ref = doc
32 .catalog_ref()
33 .cloned()
34 .unwrap_or(IndirectRef {
35 obj_num: 1,
36 gen_num: 0,
37 });
38
39 let info_ref = doc
41 .trailer()
42 .get_ref(b"Info")
43 .cloned();
44
45 let mut max_obj = 0u32;
47 let refs: Vec<IndirectRef> = doc.object_refs().collect();
48 for iref in &refs {
49 if let Ok(obj) = doc.resolve(iref) {
50 writer.objects.push((iref.obj_num, obj));
51 max_obj = max_obj.max(iref.obj_num);
52 }
53 }
54 writer.next_obj_num = max_obj + 1;
55
56 Ok(Self {
57 writer,
58 catalog_ref,
59 info_ref,
60 })
61 }
62
63 pub fn writer(&mut self) -> &mut PdfWriter {
65 &mut self.writer
66 }
67
68 pub fn catalog_ref(&self) -> &IndirectRef {
70 &self.catalog_ref
71 }
72
73 pub fn set_object(&mut self, obj_num: u32, obj: PdfObject) {
75 self.writer.set_object(obj_num, obj);
76 }
77
78 pub fn add_object(&mut self, obj: PdfObject) -> IndirectRef {
80 self.writer.add_object(obj)
81 }
82
83 pub fn find_object_pub(&self, obj_num: u32) -> Option<&PdfObject> {
85 self.find_object(obj_num)
86 }
87
88 pub fn delete_page(&mut self, page_index: usize) -> Result<()> {
91 let pages_ref = self.find_pages_ref()?;
92 let pages_obj_num = pages_ref.obj_num;
93
94 let pages_obj = self.find_object(pages_obj_num)
96 .cloned()
97 .unwrap_or(PdfObject::Null);
98
99 if let PdfObject::Dict(mut pages_dict) = pages_obj {
100 if let Some(PdfObject::Array(mut kids)) = pages_dict.remove(b"Kids") {
101 if page_index < kids.len() {
102 kids.remove(page_index);
103 let count = kids.len() as i64;
104 pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(kids));
105 pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
106 self.writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
107 }
108 }
109 }
110
111 Ok(())
112 }
113
114 pub fn insert_page(&mut self, page_index: usize, page: PageBuilder) -> Result<()> {
116 let pages_ref = self.find_pages_ref()?;
117 let pages_obj_num = pages_ref.obj_num;
118
119 let page_ref = page.build(&mut self.writer, &pages_ref);
120
121 let pages_obj = self.find_object(pages_obj_num)
122 .cloned()
123 .unwrap_or(PdfObject::Null);
124
125 if let PdfObject::Dict(mut pages_dict) = pages_obj {
126 if let Some(PdfObject::Array(mut kids)) = pages_dict.remove(b"Kids") {
127 let idx = page_index.min(kids.len());
128 kids.insert(idx, PdfObject::Reference(page_ref));
129 let count = kids.len() as i64;
130 pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(kids));
131 pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
132 self.writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
133 }
134 }
135
136 Ok(())
137 }
138
139 pub fn reorder_pages(&mut self, order: &[usize]) -> Result<()> {
141 let pages_ref = self.find_pages_ref()?;
142 let pages_obj_num = pages_ref.obj_num;
143
144 let pages_obj = self.find_object(pages_obj_num)
145 .cloned()
146 .unwrap_or(PdfObject::Null);
147
148 if let PdfObject::Dict(mut pages_dict) = pages_obj {
149 if let Some(PdfObject::Array(kids)) = pages_dict.remove(b"Kids") {
150 let mut new_kids = Vec::with_capacity(order.len());
151 for &idx in order {
152 if idx < kids.len() {
153 new_kids.push(kids[idx].clone());
154 }
155 }
156 let count = new_kids.len() as i64;
157 pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(new_kids));
158 pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
159 self.writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
160 }
161 }
162
163 Ok(())
164 }
165
166 pub fn set_info(&mut self, key: &[u8], value: &str) {
168 let info_num = if let Some(ref r) = self.info_ref {
169 r.obj_num
170 } else {
171 let num = self.writer.alloc_object_num();
172 self.info_ref = Some(IndirectRef {
173 obj_num: num,
174 gen_num: 0,
175 });
176 num
177 };
178
179 let info_obj = self.find_object(info_num)
181 .cloned()
182 .unwrap_or(PdfObject::Dict(PdfDict::new()));
183
184 if let PdfObject::Dict(mut info_dict) = info_obj {
185 info_dict.insert(
186 key.to_vec(),
187 PdfObject::String(value.as_bytes().to_vec()),
188 );
189 self.writer.set_object(info_num, PdfObject::Dict(info_dict));
190 }
191 }
192
193 pub fn garbage_collect(&mut self) {
198 let mut reachable = std::collections::HashSet::new();
199
200 reachable.insert(self.catalog_ref.obj_num);
202 if let Some(ref info) = self.info_ref {
203 reachable.insert(info.obj_num);
204 }
205
206 let mut work: Vec<u32> = reachable.iter().copied().collect();
208 while let Some(obj_num) = work.pop() {
209 if let Some(obj) = self.find_object(obj_num).cloned() {
210 let refs = collect_references(&obj);
211 for r in refs {
212 if reachable.insert(r) {
213 work.push(r);
214 }
215 }
216 }
217 }
218
219 self.writer.objects.retain(|(num, _)| reachable.contains(num));
221 }
222
223 pub fn build(self) -> Result<Vec<u8>> {
225 serialize_pdf(
226 &self.writer.objects,
227 self.writer.version,
228 &self.catalog_ref,
229 self.info_ref.as_ref(),
230 )
231 }
232
233 pub fn save(self, path: &Path) -> Result<()> {
235 let bytes = self.build()?;
236 std::fs::write(path, bytes)?;
237 Ok(())
238 }
239
240 fn find_pages_ref(&self) -> Result<IndirectRef> {
243 if let Some(PdfObject::Dict(catalog)) = self.find_object(self.catalog_ref.obj_num) {
245 if let Some(PdfObject::Reference(r)) = catalog.get(b"Pages") {
246 return Ok(r.clone());
247 }
248 }
249 Ok(IndirectRef {
251 obj_num: 2,
252 gen_num: 0,
253 })
254 }
255
256 fn find_object(&self, obj_num: u32) -> Option<&PdfObject> {
257 self.writer
258 .objects
259 .iter()
260 .find(|(n, _)| *n == obj_num)
261 .map(|(_, o)| o)
262 }
263}
264
265fn collect_references(obj: &PdfObject) -> Vec<u32> {
267 let mut refs = Vec::new();
268 collect_references_inner(obj, &mut refs);
269 refs
270}
271
272fn collect_references_inner(obj: &PdfObject, refs: &mut Vec<u32>) {
273 match obj {
274 PdfObject::Reference(r) => {
275 refs.push(r.obj_num);
276 }
277 PdfObject::Dict(d) => {
278 for (_, val) in d.iter() {
279 collect_references_inner(val, refs);
280 }
281 }
282 PdfObject::Array(arr) => {
283 for item in arr {
284 collect_references_inner(item, refs);
285 }
286 }
287 PdfObject::Stream { dict, .. } => {
288 for (_, val) in dict.iter() {
289 collect_references_inner(val, refs);
290 }
291 }
292 _ => {}
293 }
294}
295
296pub fn incremental_save(original_data: &[u8], modifier: DocumentModifier) -> Result<Vec<u8>> {
301 use std::io::Write;
302
303 let old_startxref = crate::xref::find_startxref(original_data)?;
305
306 let mut buf = original_data.to_vec();
307
308 let max_obj_num = modifier
310 .writer
311 .objects
312 .iter()
313 .map(|(n, _)| *n)
314 .max()
315 .unwrap_or(0);
316 let xref_size = max_obj_num + 1;
317
318 let mut offsets: Vec<(u32, usize)> = Vec::new();
320 for (obj_num, obj) in &modifier.writer.objects {
321 let offset = buf.len();
322 offsets.push((*obj_num, offset));
323 write!(buf, "{} 0 obj\n", obj_num)?;
324 write!(buf, "{}", obj)?;
326 write!(buf, "\nendobj\n")?;
327 }
328
329 let new_xref_offset = buf.len();
331 write!(buf, "xref\n")?;
332
333 let mut sorted_offsets = offsets.clone();
336 sorted_offsets.sort_by_key(|(n, _)| *n);
337
338 for (obj_num, offset) in &sorted_offsets {
340 write!(buf, "{} 1\n", obj_num)?;
341 write!(buf, "{:010} {:05} n \r\n", offset, 0)?;
342 }
343
344 let mut trailer = PdfDict::new();
346 trailer.insert(b"Size".to_vec(), PdfObject::Integer(xref_size as i64));
347 trailer.insert(
348 b"Root".to_vec(),
349 PdfObject::Reference(modifier.catalog_ref.clone()),
350 );
351 if let Some(ref info) = modifier.info_ref {
352 trailer.insert(b"Info".to_vec(), PdfObject::Reference(info.clone()));
353 }
354 trailer.insert(
355 b"Prev".to_vec(),
356 PdfObject::Integer(old_startxref as i64),
357 );
358
359 write!(buf, "trailer\n")?;
360 write!(buf, "{}", PdfObject::Dict(trailer))?;
361 write!(buf, "\n")?;
362
363 write!(buf, "startxref\n{}\n%%EOF\n", new_xref_offset)?;
364
365 Ok(buf)
366}
367
368pub fn merge_documents(docs: &[&PdfDocument]) -> Result<Vec<u8>> {
373 let mut writer = PdfWriter::new();
374 let pages_obj_num = writer.alloc_object_num();
375 let pages_ref = IndirectRef {
376 obj_num: pages_obj_num,
377 gen_num: 0,
378 };
379
380 let mut all_page_refs: Vec<IndirectRef> = Vec::new();
381
382 for doc in docs.iter() {
383 let pages = collect_pages(*doc)?;
384 for page_info in &pages {
385 let page_ref = graft_page(&mut writer, *doc, page_info, &pages_ref)?;
386 all_page_refs.push(page_ref);
387 }
388 }
389
390 let kids: Vec<PdfObject> = all_page_refs
392 .iter()
393 .map(|r| PdfObject::Reference(r.clone()))
394 .collect();
395 let count = kids.len() as i64;
396
397 let mut pages_dict = PdfDict::new();
398 pages_dict.insert(b"Type".to_vec(), PdfObject::Name(b"Pages".to_vec()));
399 pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(kids));
400 pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
401 writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
402
403 let mut catalog_dict = PdfDict::new();
405 catalog_dict.insert(b"Type".to_vec(), PdfObject::Name(b"Catalog".to_vec()));
406 catalog_dict.insert(b"Pages".to_vec(), PdfObject::Reference(pages_ref));
407 let catalog_ref = writer.add_object(PdfObject::Dict(catalog_dict));
408
409 serialize_pdf(&writer.objects, (1, 7), &catalog_ref, None)
410}
411
412fn graft_page(
415 writer: &mut PdfWriter,
416 doc: &PdfDocument,
417 page_info: &PageInfo,
418 new_pages_ref: &IndirectRef,
419) -> Result<IndirectRef> {
420 let mut remap: HashMap<u32, u32> = HashMap::new();
421
422 let page_obj = doc.resolve(&page_info.page_ref)?;
424
425 let new_page_obj = deep_copy_object(writer, doc, &page_obj, &mut remap)?;
427
428 if let PdfObject::Dict(mut page_dict) = new_page_obj {
430 page_dict.insert(
431 b"Parent".to_vec(),
432 PdfObject::Reference(new_pages_ref.clone()),
433 );
434 Ok(writer.add_object(PdfObject::Dict(page_dict)))
435 } else {
436 Ok(writer.add_object(new_page_obj))
437 }
438}
439
440fn deep_copy_object(
442 writer: &mut PdfWriter,
443 doc: &PdfDocument,
444 obj: &PdfObject,
445 remap: &mut HashMap<u32, u32>,
446) -> Result<PdfObject> {
447 match obj {
448 PdfObject::Reference(r) => {
449 if let Some(&new_num) = remap.get(&r.obj_num) {
451 return Ok(PdfObject::Reference(IndirectRef {
452 obj_num: new_num,
453 gen_num: 0,
454 }));
455 }
456
457 let new_num = writer.alloc_object_num();
459 remap.insert(r.obj_num, new_num);
460
461 let resolved = doc.resolve(r)?;
463 let copied = deep_copy_object(writer, doc, &resolved, remap)?;
464 writer.set_object(new_num, copied);
465
466 Ok(PdfObject::Reference(IndirectRef {
467 obj_num: new_num,
468 gen_num: 0,
469 }))
470 }
471 PdfObject::Dict(d) => {
472 let mut new_dict = PdfDict::new();
473 for (key, val) in d.iter() {
474 let new_val = deep_copy_object(writer, doc, val, remap)?;
475 new_dict.insert(key.clone(), new_val);
476 }
477 Ok(PdfObject::Dict(new_dict))
478 }
479 PdfObject::Array(arr) => {
480 let mut new_arr = Vec::with_capacity(arr.len());
481 for item in arr {
482 new_arr.push(deep_copy_object(writer, doc, item, remap)?);
483 }
484 Ok(PdfObject::Array(new_arr))
485 }
486 PdfObject::Stream { dict, data } => {
487 let mut new_dict = PdfDict::new();
488 for (key, val) in dict.iter() {
489 let new_val = deep_copy_object(writer, doc, val, remap)?;
490 new_dict.insert(key.clone(), new_val);
491 }
492 Ok(PdfObject::Stream {
493 dict: new_dict,
494 data: data.clone(),
495 })
496 }
497 other => Ok(other.clone()),
499 }
500}
501
502#[cfg(test)]
503mod tests {
504 use super::*;
505 use crate::writer::document::DocumentBuilder;
506 use crate::writer::page::PageBuilder;
507
508 fn create_test_pdf(text: &str, num_pages: usize) -> Vec<u8> {
509 let mut doc = DocumentBuilder::new();
510 let font = doc.add_standard_font("Helvetica");
511
512 for i in 0..num_pages {
513 let mut page = PageBuilder::new(612.0, 792.0);
514 page.add_font(&font, "Helvetica");
515 page.begin_text();
516 page.set_font(&font, 12.0);
517 page.move_to(72.0, 720.0);
518 page.show_text(&format!("{} - Page {}", text, i + 1));
519 page.end_text();
520 doc.add_page(page);
521 }
522
523 doc.build().unwrap()
524 }
525
526 #[test]
527 fn test_modifier_roundtrip() {
528 let bytes = create_test_pdf("Hello", 2);
529 let mut doc = PdfDocument::from_bytes(bytes).unwrap();
530
531 let modifier = DocumentModifier::from_document(&doc).unwrap();
532 let new_bytes = modifier.build().unwrap();
533
534 let mut reparsed = PdfDocument::from_bytes(new_bytes).unwrap();
535 let pages = collect_pages(&reparsed).unwrap();
536 assert_eq!(pages.len(), 2);
537 }
538
539 #[test]
540 fn test_delete_page() {
541 let bytes = create_test_pdf("Test", 3);
542 let mut doc = PdfDocument::from_bytes(bytes).unwrap();
543
544 let mut modifier = DocumentModifier::from_document(&doc).unwrap();
545 modifier.delete_page(1).unwrap(); let new_bytes = modifier.build().unwrap();
548 let mut reparsed = PdfDocument::from_bytes(new_bytes).unwrap();
549 let pages = collect_pages(&reparsed).unwrap();
550 assert_eq!(pages.len(), 2);
551 }
552
553 #[test]
554 fn test_reorder_pages() {
555 let bytes = create_test_pdf("Reorder", 3);
556 let mut doc = PdfDocument::from_bytes(bytes).unwrap();
557
558 let mut modifier = DocumentModifier::from_document(&doc).unwrap();
559 modifier.reorder_pages(&[2, 0, 1]).unwrap(); let new_bytes = modifier.build().unwrap();
562 let mut reparsed = PdfDocument::from_bytes(new_bytes).unwrap();
563 let pages = collect_pages(&reparsed).unwrap();
564 assert_eq!(pages.len(), 3);
565 }
566
567 #[test]
568 fn test_set_info() {
569 let bytes = create_test_pdf("Info", 1);
570 let mut doc = PdfDocument::from_bytes(bytes).unwrap();
571
572 let mut modifier = DocumentModifier::from_document(&doc).unwrap();
573 modifier.set_info(b"Title", "New Title");
574 modifier.set_info(b"Author", "New Author");
575
576 let new_bytes = modifier.build().unwrap();
577 let text = String::from_utf8_lossy(&new_bytes);
578 assert!(text.contains("New Title"));
579 assert!(text.contains("New Author"));
580 }
581
582 #[test]
583 fn test_merge_documents() {
584 let bytes1 = create_test_pdf("Doc1", 2);
585 let bytes2 = create_test_pdf("Doc2", 3);
586
587 let mut doc1 = PdfDocument::from_bytes(bytes1).unwrap();
588 let mut doc2 = PdfDocument::from_bytes(bytes2).unwrap();
589
590 let merged = merge_documents(&[&doc1, &doc2]).unwrap();
591
592 let mut reparsed = PdfDocument::from_bytes(merged).unwrap();
593 let pages = collect_pages(&reparsed).unwrap();
594 assert_eq!(pages.len(), 5); }
596
597 #[test]
598 fn test_incremental_save() {
599 let original = create_test_pdf("Original", 1);
600 let original_len = original.len();
601
602 let mut doc = PdfDocument::from_bytes(original.clone()).unwrap();
603 let mut modifier = DocumentModifier::from_document(&doc).unwrap();
604 modifier.set_info(b"Title", "Updated Title");
605
606 let result = incremental_save(&original, modifier).unwrap();
607
608 assert!(result.len() > original_len);
610 assert_eq!(&result[..original_len], &original[..]);
611
612 let text = String::from_utf8_lossy(&result);
614 assert!(text.contains("Updated Title"));
615
616 assert!(text.contains("/Prev"));
618
619 let tail = String::from_utf8_lossy(&result[result.len().saturating_sub(50)..]);
621 assert!(tail.contains("%%EOF"));
622 }
623
624 #[test]
625 fn test_garbage_collect() {
626 let bytes = create_test_pdf("GC Test", 1);
627 let mut doc = PdfDocument::from_bytes(bytes).unwrap();
628 let mut modifier = DocumentModifier::from_document(&doc).unwrap();
629
630 modifier.garbage_collect();
632 let count_baseline = modifier.writer.objects.len();
633
634 modifier.add_object(PdfObject::Integer(999));
636 modifier.add_object(PdfObject::String(b"orphan".to_vec()));
637 let count_with_orphans = modifier.writer.objects.len();
638 assert_eq!(count_with_orphans, count_baseline + 2);
639
640 modifier.garbage_collect();
642 let count_after = modifier.writer.objects.len();
643
644 assert_eq!(count_after, count_baseline);
646 }
647
648 #[test]
649 fn test_resource_conflict_merge() {
650 let bytes1 = create_test_pdf("Doc1", 1);
654 let bytes2 = create_test_pdf("Doc2", 1);
655
656 let mut doc1 = PdfDocument::from_bytes(bytes1).unwrap();
657 let mut doc2 = PdfDocument::from_bytes(bytes2).unwrap();
658
659 let merged = merge_documents(&[&doc1, &doc2]).unwrap();
660
661 let mut reparsed = PdfDocument::from_bytes(merged).unwrap();
662 let pages = collect_pages(&reparsed).unwrap();
663 assert_eq!(pages.len(), 2);
664
665 assert!(reparsed.catalog_ref().is_some());
668 }
669}