Skip to main content

justpdf_core/writer/
modify.rs

1//! Document modification: load existing PDF, modify, and save.
2//! Also provides page merge/split operations.
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use crate::error::Result;
8use crate::object::{IndirectRef, PdfDict, PdfObject};
9use crate::page::{collect_pages, PageInfo};
10use crate::parser::PdfDocument;
11use crate::writer::page::PageBuilder;
12use crate::writer::serialize::serialize_pdf;
13use crate::writer::PdfWriter;
14
15/// Modifier for existing PDF documents.
16/// Loads all objects from a PdfDocument, allows modification, then saves.
17pub struct DocumentModifier {
18    writer: PdfWriter,
19    catalog_ref: IndirectRef,
20    info_ref: Option<IndirectRef>,
21}
22
23impl DocumentModifier {
24    /// Create a modifier from an existing PdfDocument.
25    /// Copies all objects from the document into the writer.
26    pub fn from_document(doc: &PdfDocument) -> Result<Self> {
27        let mut writer = PdfWriter::new();
28        writer.version = doc.version;
29
30        // Find catalog reference
31        let catalog_ref = doc
32            .catalog_ref()
33            .cloned()
34            .unwrap_or(IndirectRef {
35                obj_num: 1,
36                gen_num: 0,
37            });
38
39        // Find info reference from trailer
40        let info_ref = doc
41            .trailer()
42            .get_ref(b"Info")
43            .cloned();
44
45        // Copy all objects
46        let mut max_obj = 0u32;
47        let refs: Vec<IndirectRef> = doc.object_refs().collect();
48        for iref in &refs {
49            if let Ok(obj) = doc.resolve(iref) {
50                writer.objects.push((iref.obj_num, obj));
51                max_obj = max_obj.max(iref.obj_num);
52            }
53        }
54        writer.next_obj_num = max_obj + 1;
55
56        Ok(Self {
57            writer,
58            catalog_ref,
59            info_ref,
60        })
61    }
62
63    /// Get a reference to the internal writer for low-level modifications.
64    pub fn writer(&mut self) -> &mut PdfWriter {
65        &mut self.writer
66    }
67
68    /// Get the catalog reference.
69    pub fn catalog_ref(&self) -> &IndirectRef {
70        &self.catalog_ref
71    }
72
73    /// Replace an object at a given object number.
74    pub fn set_object(&mut self, obj_num: u32, obj: PdfObject) {
75        self.writer.set_object(obj_num, obj);
76    }
77
78    /// Add a new object and return its reference.
79    pub fn add_object(&mut self, obj: PdfObject) -> IndirectRef {
80        self.writer.add_object(obj)
81    }
82
83    /// Find an object by object number (public accessor).
84    pub fn find_object_pub(&self, obj_num: u32) -> Option<&PdfObject> {
85        self.find_object(obj_num)
86    }
87
88    /// Delete a page by index (0-based).
89    /// Modifies the Pages tree to remove the page reference.
90    pub fn delete_page(&mut self, page_index: usize) -> Result<()> {
91        let pages_ref = self.find_pages_ref()?;
92        let pages_obj_num = pages_ref.obj_num;
93
94        // Find the Pages dict
95        let pages_obj = self.find_object(pages_obj_num)
96            .cloned()
97            .unwrap_or(PdfObject::Null);
98
99        if let PdfObject::Dict(mut pages_dict) = pages_obj {
100            if let Some(PdfObject::Array(mut kids)) = pages_dict.remove(b"Kids") {
101                if page_index < kids.len() {
102                    kids.remove(page_index);
103                    let count = kids.len() as i64;
104                    pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(kids));
105                    pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
106                    self.writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
107                }
108            }
109        }
110
111        Ok(())
112    }
113
114    /// Insert a new page at the given index.
115    pub fn insert_page(&mut self, page_index: usize, page: PageBuilder) -> Result<()> {
116        let pages_ref = self.find_pages_ref()?;
117        let pages_obj_num = pages_ref.obj_num;
118
119        let page_ref = page.build(&mut self.writer, &pages_ref);
120
121        let pages_obj = self.find_object(pages_obj_num)
122            .cloned()
123            .unwrap_or(PdfObject::Null);
124
125        if let PdfObject::Dict(mut pages_dict) = pages_obj {
126            if let Some(PdfObject::Array(mut kids)) = pages_dict.remove(b"Kids") {
127                let idx = page_index.min(kids.len());
128                kids.insert(idx, PdfObject::Reference(page_ref));
129                let count = kids.len() as i64;
130                pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(kids));
131                pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
132                self.writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
133            }
134        }
135
136        Ok(())
137    }
138
139    /// Reorder pages. `order` is a list of 0-based page indices in the desired order.
140    pub fn reorder_pages(&mut self, order: &[usize]) -> Result<()> {
141        let pages_ref = self.find_pages_ref()?;
142        let pages_obj_num = pages_ref.obj_num;
143
144        let pages_obj = self.find_object(pages_obj_num)
145            .cloned()
146            .unwrap_or(PdfObject::Null);
147
148        if let PdfObject::Dict(mut pages_dict) = pages_obj {
149            if let Some(PdfObject::Array(kids)) = pages_dict.remove(b"Kids") {
150                let mut new_kids = Vec::with_capacity(order.len());
151                for &idx in order {
152                    if idx < kids.len() {
153                        new_kids.push(kids[idx].clone());
154                    }
155                }
156                let count = new_kids.len() as i64;
157                pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(new_kids));
158                pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
159                self.writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
160            }
161        }
162
163        Ok(())
164    }
165
166    /// Set or update a metadata field in the Info dictionary.
167    pub fn set_info(&mut self, key: &[u8], value: &str) {
168        let info_num = if let Some(ref r) = self.info_ref {
169            r.obj_num
170        } else {
171            let num = self.writer.alloc_object_num();
172            self.info_ref = Some(IndirectRef {
173                obj_num: num,
174                gen_num: 0,
175            });
176            num
177        };
178
179        // Get or create info dict
180        let info_obj = self.find_object(info_num)
181            .cloned()
182            .unwrap_or(PdfObject::Dict(PdfDict::new()));
183
184        if let PdfObject::Dict(mut info_dict) = info_obj {
185            info_dict.insert(
186                key.to_vec(),
187                PdfObject::String(value.as_bytes().to_vec()),
188            );
189            self.writer.set_object(info_num, PdfObject::Dict(info_dict));
190        }
191    }
192
193    /// Perform garbage collection: remove unreachable objects.
194    ///
195    /// Traverses all objects reachable from the catalog (and info dict),
196    /// then removes any objects that are not reachable.
197    pub fn garbage_collect(&mut self) {
198        let mut reachable = std::collections::HashSet::new();
199
200        // Mark catalog and info as roots
201        reachable.insert(self.catalog_ref.obj_num);
202        if let Some(ref info) = self.info_ref {
203            reachable.insert(info.obj_num);
204        }
205
206        // Iteratively mark all reachable objects
207        let mut work: Vec<u32> = reachable.iter().copied().collect();
208        while let Some(obj_num) = work.pop() {
209            if let Some(obj) = self.find_object(obj_num).cloned() {
210                let refs = collect_references(&obj);
211                for r in refs {
212                    if reachable.insert(r) {
213                        work.push(r);
214                    }
215                }
216            }
217        }
218
219        // Remove unreachable objects
220        self.writer.objects.retain(|(num, _)| reachable.contains(num));
221    }
222
223    /// Serialize to PDF bytes.
224    pub fn build(self) -> Result<Vec<u8>> {
225        serialize_pdf(
226            &self.writer.objects,
227            self.writer.version,
228            &self.catalog_ref,
229            self.info_ref.as_ref(),
230        )
231    }
232
233    /// Serialize to PDF bytes using xref streams (PDF 1.5+).
234    /// `compressed` contains info about objects packed into object streams.
235    pub fn build_with_xref_stream(
236        self,
237        compressed: &[crate::writer::object_stream::CompressedObjInfo],
238    ) -> Result<Vec<u8>> {
239        crate::writer::serialize::serialize_pdf_with_xref_stream(
240            &self.writer.objects,
241            compressed,
242            self.writer.version,
243            &self.catalog_ref,
244            self.info_ref.as_ref(),
245        )
246    }
247
248    /// Save to file.
249    pub fn save(self, path: &Path) -> Result<()> {
250        let bytes = self.build()?;
251        std::fs::write(path, bytes)?;
252        Ok(())
253    }
254
255    // --- helpers ---
256
257    fn find_pages_ref(&self) -> Result<IndirectRef> {
258        // Look up Catalog → /Pages
259        if let Some(PdfObject::Dict(catalog)) = self.find_object(self.catalog_ref.obj_num) {
260            if let Some(PdfObject::Reference(r)) = catalog.get(b"Pages") {
261                return Ok(r.clone());
262            }
263        }
264        // Fallback: guess object 2
265        Ok(IndirectRef {
266            obj_num: 2,
267            gen_num: 0,
268        })
269    }
270
271    fn find_object(&self, obj_num: u32) -> Option<&PdfObject> {
272        self.writer
273            .objects
274            .iter()
275            .find(|(n, _)| *n == obj_num)
276            .map(|(_, o)| o)
277    }
278}
279
280/// Collect all indirect reference object numbers from a PdfObject recursively.
281fn collect_references(obj: &PdfObject) -> Vec<u32> {
282    let mut refs = Vec::new();
283    collect_references_inner(obj, &mut refs);
284    refs
285}
286
287fn collect_references_inner(obj: &PdfObject, refs: &mut Vec<u32>) {
288    match obj {
289        PdfObject::Reference(r) => {
290            refs.push(r.obj_num);
291        }
292        PdfObject::Dict(d) => {
293            for (_, val) in d.iter() {
294                collect_references_inner(val, refs);
295            }
296        }
297        PdfObject::Array(arr) => {
298            for item in arr {
299                collect_references_inner(item, refs);
300            }
301        }
302        PdfObject::Stream { dict, .. } => {
303            for (_, val) in dict.iter() {
304                collect_references_inner(val, refs);
305            }
306        }
307        _ => {}
308    }
309}
310
311/// Perform an incremental save: append modified objects to the original PDF data.
312///
313/// This preserves the original bytes and appends only modified/new objects,
314/// a new xref table, and a new trailer with /Prev pointing to the old xref.
315pub fn incremental_save(original_data: &[u8], modifier: DocumentModifier) -> Result<Vec<u8>> {
316    use std::io::Write;
317
318    // Find old startxref
319    let old_startxref = crate::xref::find_startxref(original_data)?;
320
321    let mut buf = original_data.to_vec();
322
323    // Determine max object number for xref size
324    let max_obj_num = modifier
325        .writer
326        .objects
327        .iter()
328        .map(|(n, _)| *n)
329        .max()
330        .unwrap_or(0);
331    let xref_size = max_obj_num + 1;
332
333    // Write each object and track offsets
334    let mut offsets: Vec<(u32, usize)> = Vec::new();
335    for (obj_num, obj) in &modifier.writer.objects {
336        let offset = buf.len();
337        offsets.push((*obj_num, offset));
338        write!(buf, "{} 0 obj\n", obj_num)?;
339        // Use the serialize module's logic inline
340        write!(buf, "{}", obj)?;
341        write!(buf, "\nendobj\n")?;
342    }
343
344    // Write new xref table
345    let new_xref_offset = buf.len();
346    write!(buf, "xref\n")?;
347
348    // Write subsections for each modified object
349    // Sort offsets by object number
350    let mut sorted_offsets = offsets.clone();
351    sorted_offsets.sort_by_key(|(n, _)| *n);
352
353    // Write as individual subsections
354    for (obj_num, offset) in &sorted_offsets {
355        write!(buf, "{} 1\n", obj_num)?;
356        write!(buf, "{:010} {:05} n \r\n", offset, 0)?;
357    }
358
359    // Write trailer
360    let mut trailer = PdfDict::new();
361    trailer.insert(b"Size".to_vec(), PdfObject::Integer(xref_size as i64));
362    trailer.insert(
363        b"Root".to_vec(),
364        PdfObject::Reference(modifier.catalog_ref.clone()),
365    );
366    if let Some(ref info) = modifier.info_ref {
367        trailer.insert(b"Info".to_vec(), PdfObject::Reference(info.clone()));
368    }
369    trailer.insert(
370        b"Prev".to_vec(),
371        PdfObject::Integer(old_startxref as i64),
372    );
373
374    write!(buf, "trailer\n")?;
375    write!(buf, "{}", PdfObject::Dict(trailer))?;
376    write!(buf, "\n")?;
377
378    write!(buf, "startxref\n{}\n%%EOF\n", new_xref_offset)?;
379
380    Ok(buf)
381}
382
383/// Merge pages from multiple PDF documents into one.
384///
385/// Returns the merged PDF as bytes. Pages are concatenated in order:
386/// all pages from doc1, then all from doc2, etc.
387pub fn merge_documents(docs: &[&PdfDocument]) -> Result<Vec<u8>> {
388    let mut writer = PdfWriter::new();
389    let pages_obj_num = writer.alloc_object_num();
390    let pages_ref = IndirectRef {
391        obj_num: pages_obj_num,
392        gen_num: 0,
393    };
394
395    let mut all_page_refs: Vec<IndirectRef> = Vec::new();
396
397    for doc in docs.iter() {
398        let pages = collect_pages(*doc)?;
399        for page_info in &pages {
400            let page_ref = graft_page(&mut writer, *doc, page_info, &pages_ref)?;
401            all_page_refs.push(page_ref);
402        }
403    }
404
405    // Create Pages dict
406    let kids: Vec<PdfObject> = all_page_refs
407        .iter()
408        .map(|r| PdfObject::Reference(r.clone()))
409        .collect();
410    let count = kids.len() as i64;
411
412    let mut pages_dict = PdfDict::new();
413    pages_dict.insert(b"Type".to_vec(), PdfObject::Name(b"Pages".to_vec()));
414    pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(kids));
415    pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(count));
416    writer.set_object(pages_obj_num, PdfObject::Dict(pages_dict));
417
418    // Create Catalog
419    let mut catalog_dict = PdfDict::new();
420    catalog_dict.insert(b"Type".to_vec(), PdfObject::Name(b"Catalog".to_vec()));
421    catalog_dict.insert(b"Pages".to_vec(), PdfObject::Reference(pages_ref));
422    let catalog_ref = writer.add_object(PdfObject::Dict(catalog_dict));
423
424    serialize_pdf(&writer.objects, (1, 7), &catalog_ref, None)
425}
426
427/// Graft a single page from a source document into the writer.
428/// Copies the page dict and all referenced objects with remapped object numbers.
429fn graft_page(
430    writer: &mut PdfWriter,
431    doc: &PdfDocument,
432    page_info: &PageInfo,
433    new_pages_ref: &IndirectRef,
434) -> Result<IndirectRef> {
435    let mut remap: HashMap<u32, u32> = HashMap::new();
436
437    // Resolve the page object
438    let page_obj = doc.resolve(&page_info.page_ref)?;
439
440    // Deep-copy the page and all referenced objects
441    let new_page_obj = deep_copy_object(writer, doc, &page_obj, &mut remap)?;
442
443    // Update Parent reference to point to our new Pages
444    if let PdfObject::Dict(mut page_dict) = new_page_obj {
445        page_dict.insert(
446            b"Parent".to_vec(),
447            PdfObject::Reference(new_pages_ref.clone()),
448        );
449        Ok(writer.add_object(PdfObject::Dict(page_dict)))
450    } else {
451        Ok(writer.add_object(new_page_obj))
452    }
453}
454
455/// Deep-copy a PdfObject, resolving all references and remapping object numbers.
456fn deep_copy_object(
457    writer: &mut PdfWriter,
458    doc: &PdfDocument,
459    obj: &PdfObject,
460    remap: &mut HashMap<u32, u32>,
461) -> Result<PdfObject> {
462    match obj {
463        PdfObject::Reference(r) => {
464            // Check if already remapped
465            if let Some(&new_num) = remap.get(&r.obj_num) {
466                return Ok(PdfObject::Reference(IndirectRef {
467                    obj_num: new_num,
468                    gen_num: 0,
469                }));
470            }
471
472            // Allocate new number first (for circular reference prevention)
473            let new_num = writer.alloc_object_num();
474            remap.insert(r.obj_num, new_num);
475
476            // Resolve and deep-copy
477            let resolved = doc.resolve(r)?;
478            let copied = deep_copy_object(writer, doc, &resolved, remap)?;
479            writer.set_object(new_num, copied);
480
481            Ok(PdfObject::Reference(IndirectRef {
482                obj_num: new_num,
483                gen_num: 0,
484            }))
485        }
486        PdfObject::Dict(d) => {
487            let mut new_dict = PdfDict::new();
488            for (key, val) in d.iter() {
489                let new_val = deep_copy_object(writer, doc, val, remap)?;
490                new_dict.insert(key.clone(), new_val);
491            }
492            Ok(PdfObject::Dict(new_dict))
493        }
494        PdfObject::Array(arr) => {
495            let mut new_arr = Vec::with_capacity(arr.len());
496            for item in arr {
497                new_arr.push(deep_copy_object(writer, doc, item, remap)?);
498            }
499            Ok(PdfObject::Array(new_arr))
500        }
501        PdfObject::Stream { dict, data } => {
502            let mut new_dict = PdfDict::new();
503            for (key, val) in dict.iter() {
504                let new_val = deep_copy_object(writer, doc, val, remap)?;
505                new_dict.insert(key.clone(), new_val);
506            }
507            Ok(PdfObject::Stream {
508                dict: new_dict,
509                data: data.clone(),
510            })
511        }
512        // Primitive types: just clone
513        other => Ok(other.clone()),
514    }
515}
516
517#[cfg(test)]
518mod tests {
519    use super::*;
520    use crate::writer::document::DocumentBuilder;
521    use crate::writer::page::PageBuilder;
522
523    fn create_test_pdf(text: &str, num_pages: usize) -> Vec<u8> {
524        let mut doc = DocumentBuilder::new();
525        let font = doc.add_standard_font("Helvetica");
526
527        for i in 0..num_pages {
528            let mut page = PageBuilder::new(612.0, 792.0);
529            page.add_font(&font, "Helvetica");
530            page.begin_text();
531            page.set_font(&font, 12.0);
532            page.move_to(72.0, 720.0);
533            page.show_text(&format!("{} - Page {}", text, i + 1));
534            page.end_text();
535            doc.add_page(page);
536        }
537
538        doc.build().unwrap()
539    }
540
541    #[test]
542    fn test_modifier_roundtrip() {
543        let bytes = create_test_pdf("Hello", 2);
544        let mut doc = PdfDocument::from_bytes(bytes).unwrap();
545
546        let modifier = DocumentModifier::from_document(&doc).unwrap();
547        let new_bytes = modifier.build().unwrap();
548
549        let mut reparsed = PdfDocument::from_bytes(new_bytes).unwrap();
550        let pages = collect_pages(&reparsed).unwrap();
551        assert_eq!(pages.len(), 2);
552    }
553
554    #[test]
555    fn test_delete_page() {
556        let bytes = create_test_pdf("Test", 3);
557        let mut doc = PdfDocument::from_bytes(bytes).unwrap();
558
559        let mut modifier = DocumentModifier::from_document(&doc).unwrap();
560        modifier.delete_page(1).unwrap(); // remove middle page
561
562        let new_bytes = modifier.build().unwrap();
563        let mut reparsed = PdfDocument::from_bytes(new_bytes).unwrap();
564        let pages = collect_pages(&reparsed).unwrap();
565        assert_eq!(pages.len(), 2);
566    }
567
568    #[test]
569    fn test_reorder_pages() {
570        let bytes = create_test_pdf("Reorder", 3);
571        let mut doc = PdfDocument::from_bytes(bytes).unwrap();
572
573        let mut modifier = DocumentModifier::from_document(&doc).unwrap();
574        modifier.reorder_pages(&[2, 0, 1]).unwrap(); // reverse-ish
575
576        let new_bytes = modifier.build().unwrap();
577        let mut reparsed = PdfDocument::from_bytes(new_bytes).unwrap();
578        let pages = collect_pages(&reparsed).unwrap();
579        assert_eq!(pages.len(), 3);
580    }
581
582    #[test]
583    fn test_set_info() {
584        let bytes = create_test_pdf("Info", 1);
585        let mut doc = PdfDocument::from_bytes(bytes).unwrap();
586
587        let mut modifier = DocumentModifier::from_document(&doc).unwrap();
588        modifier.set_info(b"Title", "New Title");
589        modifier.set_info(b"Author", "New Author");
590
591        let new_bytes = modifier.build().unwrap();
592        let text = String::from_utf8_lossy(&new_bytes);
593        assert!(text.contains("New Title"));
594        assert!(text.contains("New Author"));
595    }
596
597    #[test]
598    fn test_merge_documents() {
599        let bytes1 = create_test_pdf("Doc1", 2);
600        let bytes2 = create_test_pdf("Doc2", 3);
601
602        let mut doc1 = PdfDocument::from_bytes(bytes1).unwrap();
603        let mut doc2 = PdfDocument::from_bytes(bytes2).unwrap();
604
605        let merged = merge_documents(&[&doc1, &doc2]).unwrap();
606
607        let mut reparsed = PdfDocument::from_bytes(merged).unwrap();
608        let pages = collect_pages(&reparsed).unwrap();
609        assert_eq!(pages.len(), 5); // 2 + 3
610    }
611
612    #[test]
613    fn test_incremental_save() {
614        let original = create_test_pdf("Original", 1);
615        let original_len = original.len();
616
617        let mut doc = PdfDocument::from_bytes(original.clone()).unwrap();
618        let mut modifier = DocumentModifier::from_document(&doc).unwrap();
619        modifier.set_info(b"Title", "Updated Title");
620
621        let result = incremental_save(&original, modifier).unwrap();
622
623        // The result should start with the original bytes
624        assert!(result.len() > original_len);
625        assert_eq!(&result[..original_len], &original[..]);
626
627        // Should contain the new title
628        let text = String::from_utf8_lossy(&result);
629        assert!(text.contains("Updated Title"));
630
631        // Should contain /Prev
632        assert!(text.contains("/Prev"));
633
634        // Should end with %%EOF
635        let tail = String::from_utf8_lossy(&result[result.len().saturating_sub(50)..]);
636        assert!(tail.contains("%%EOF"));
637    }
638
639    #[test]
640    fn test_garbage_collect() {
641        let bytes = create_test_pdf("GC Test", 1);
642        let mut doc = PdfDocument::from_bytes(bytes).unwrap();
643        let mut modifier = DocumentModifier::from_document(&doc).unwrap();
644
645        // Run GC first to establish baseline (some objects from parsing may be unreachable)
646        modifier.garbage_collect();
647        let count_baseline = modifier.writer.objects.len();
648
649        // Add unreachable (orphan) objects
650        modifier.add_object(PdfObject::Integer(999));
651        modifier.add_object(PdfObject::String(b"orphan".to_vec()));
652        let count_with_orphans = modifier.writer.objects.len();
653        assert_eq!(count_with_orphans, count_baseline + 2);
654
655        // Run GC again
656        modifier.garbage_collect();
657        let count_after = modifier.writer.objects.len();
658
659        // The orphan objects should be removed, back to baseline
660        assert_eq!(count_after, count_baseline);
661    }
662
663    #[test]
664    fn test_resource_conflict_merge() {
665        // Create two docs that both use "F1" as font resource name.
666        // The deep_copy approach assigns new object numbers, so each page
667        // keeps its own independent Resources dict. No conflict occurs.
668        let bytes1 = create_test_pdf("Doc1", 1);
669        let bytes2 = create_test_pdf("Doc2", 1);
670
671        let mut doc1 = PdfDocument::from_bytes(bytes1).unwrap();
672        let mut doc2 = PdfDocument::from_bytes(bytes2).unwrap();
673
674        let merged = merge_documents(&[&doc1, &doc2]).unwrap();
675
676        let mut reparsed = PdfDocument::from_bytes(merged).unwrap();
677        let pages = collect_pages(&reparsed).unwrap();
678        assert_eq!(pages.len(), 2);
679
680        // Both pages should be independently valid (each has its own Resources)
681        // Verify the merged PDF is parseable
682        assert!(reparsed.catalog_ref().is_some());
683    }
684}