Skip to main content

shadowforge_lib/adapters/
pdf.rs

1//! PDF processing adapter using lopdf and pdfium-render.
2
3use std::collections::HashMap;
4use std::io::BufWriter;
5use std::path::Path;
6
7use base64::Engine;
8use base64::engine::general_purpose;
9use bytes::Bytes;
10use image::{DynamicImage, ImageFormat};
11use lopdf::{Document, Object, dictionary};
12use pdfium_render::prelude::*;
13
14use crate::domain::errors::PdfError;
15use crate::domain::ports::PdfProcessor;
16use crate::domain::types::{CoverMedia, CoverMediaKind, Payload};
17
18// Metadata keys
19const KEY_PAGE_COUNT: &str = "page_count";
20const DEFAULT_DPI: u16 = 150;
21
22/// PDF processor implementation using lopdf and pdfium-render.
23///
24/// Handles PDF loading/saving, page rasterisation, and PDF reconstruction.
25#[derive(Debug)]
26pub struct PdfProcessorImpl {
27    /// DPI for page rasterisation.
28    dpi: u16,
29}
30
31impl Default for PdfProcessorImpl {
32    fn default() -> Self {
33        Self { dpi: DEFAULT_DPI }
34    }
35}
36
37impl PdfProcessorImpl {
38    /// Create a new PDF processor with the given DPI.
39    #[must_use]
40    pub const fn new(dpi: u16) -> Self {
41        Self { dpi }
42    }
43}
44
45impl PdfProcessor for PdfProcessorImpl {
46    fn load_pdf(&self, path: &Path) -> Result<CoverMedia, PdfError> {
47        // Load PDF document
48        let doc = Document::load(path).map_err(|e| PdfError::ParseFailed {
49            reason: e.to_string(),
50        })?;
51
52        // Check if encrypted
53        if doc.is_encrypted() {
54            return Err(PdfError::Encrypted);
55        }
56
57        // Count pages
58        let page_count = doc.get_pages().len();
59
60        // Read raw bytes
61        let bytes = std::fs::read(path).map_err(|e| PdfError::IoError {
62            reason: e.to_string(),
63        })?;
64
65        // Build metadata
66        let mut metadata = HashMap::new();
67        metadata.insert(KEY_PAGE_COUNT.to_string(), page_count.to_string());
68
69        Ok(CoverMedia {
70            kind: CoverMediaKind::PdfDocument,
71            data: Bytes::from(bytes),
72            metadata,
73        })
74    }
75
76    fn save_pdf(&self, media: &CoverMedia, path: &Path) -> Result<(), PdfError> {
77        // Write raw PDF bytes to file
78        std::fs::write(path, &media.data).map_err(|e| PdfError::IoError {
79            reason: e.to_string(),
80        })?;
81
82        Ok(())
83    }
84
85    fn render_pages_to_images(&self, pdf: &CoverMedia) -> Result<Vec<CoverMedia>, PdfError> {
86        // Initialize pdfium library
87        let pdfium = Pdfium::new(
88            Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./"))
89                .or_else(|_| Pdfium::bind_to_system_library())
90                .map_err(|e| PdfError::RenderFailed {
91                    page: 0,
92                    reason: format!("Failed to load pdfium library: {e}"),
93                })?,
94        );
95
96        // Load PDF from bytes
97        let document = pdfium
98            .load_pdf_from_byte_vec(pdf.data.to_vec(), None)
99            .map_err(|e| PdfError::ParseFailed {
100                reason: e.to_string(),
101            })?;
102
103        let page_count = document.pages().len();
104        let mut images = Vec::with_capacity(page_count as usize);
105
106        // Render each page
107        for page_index in 0..page_count {
108            let page = document
109                .pages()
110                .get(page_index)
111                .map_err(|e| PdfError::RenderFailed {
112                    page: page_index as usize,
113                    reason: e.to_string(),
114                })?;
115
116            // Render to bitmap
117            #[expect(
118                clippy::cast_possible_truncation,
119                reason = "DPI calculation for render"
120            )]
121            let target_width = (page.width().value * f32::from(self.dpi) / 72.0) as i32;
122
123            let bitmap = page
124                .render_with_config(&PdfRenderConfig::new().set_target_width(target_width))
125                .map_err(|e| PdfError::RenderFailed {
126                    page: page_index as usize,
127                    reason: e.to_string(),
128                })?;
129
130            // Convert to RGBA8 image
131            let width = bitmap.width().cast_unsigned();
132            let height = bitmap.height().cast_unsigned();
133            let rgba_data = bitmap.as_rgba_bytes();
134
135            let img =
136                image::RgbaImage::from_raw(width, height, rgba_data.clone()).ok_or_else(|| {
137                    PdfError::RenderFailed {
138                        page: page_index as usize,
139                        reason: "invalid bitmap dimensions".to_string(),
140                    }
141                })?;
142
143            // Build metadata
144            let mut metadata = HashMap::new();
145            metadata.insert("width".to_string(), width.to_string());
146            metadata.insert("height".to_string(), height.to_string());
147            metadata.insert("format".to_string(), "Png".to_string());
148            metadata.insert("page_index".to_string(), page_index.to_string());
149
150            images.push(CoverMedia {
151                kind: CoverMediaKind::PngImage,
152                data: Bytes::from(img.into_raw()),
153                metadata,
154            });
155        }
156
157        Ok(images)
158    }
159
160    #[expect(
161        clippy::too_many_lines,
162        reason = "PDF reconstruction logic is inherently complex"
163    )]
164    fn rebuild_pdf_from_images(
165        &self,
166        images: Vec<CoverMedia>,
167        _original: &CoverMedia,
168    ) -> Result<CoverMedia, PdfError> {
169        // Create a new PDF document
170        let mut doc = Document::with_version("1.7");
171
172        // Add each image as a page
173        for (page_index, img_media) in images.iter().enumerate() {
174            // Parse dimensions from metadata
175            let width: u32 = img_media
176                .metadata
177                .get("width")
178                .ok_or_else(|| PdfError::RebuildFailed {
179                    reason: "missing width metadata".to_string(),
180                })?
181                .parse()
182                .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
183                    reason: e.to_string(),
184                })?;
185
186            let height: u32 = img_media
187                .metadata
188                .get("height")
189                .ok_or_else(|| PdfError::RebuildFailed {
190                    reason: "missing height metadata".to_string(),
191                })?
192                .parse()
193                .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
194                    reason: e.to_string(),
195                })?;
196
197            // Convert RGBA data to PNG bytes
198            let img = image::RgbaImage::from_raw(width, height, img_media.data.to_vec())
199                .ok_or_else(|| PdfError::RebuildFailed {
200                    reason: "invalid image dimensions or data length".to_string(),
201                })?;
202
203            let dynamic_img = DynamicImage::ImageRgba8(img);
204            let mut png_bytes = Vec::new();
205            dynamic_img
206                .write_to(&mut std::io::Cursor::new(&mut png_bytes), ImageFormat::Png)
207                .map_err(|e| PdfError::RebuildFailed {
208                    reason: e.to_string(),
209                })?;
210
211            // Create a page with the image dimensions (convert pixels to points: 72 DPI)
212            #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
213            let page_width = width as f32 * 72.0 / f32::from(self.dpi);
214            #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
215            let page_height = height as f32 * 72.0 / f32::from(self.dpi);
216
217            let page_id = doc.new_object_id();
218            let page = doc.add_object(lopdf::dictionary! {
219                "Type" => "Page",
220                "MediaBox" => vec![0.into(), 0.into(), page_width.into(), page_height.into()],
221                "Contents" => Object::Reference((page_id.0 + 1, 0)),
222                "Resources" => lopdf::dictionary! {
223                    "XObject" => lopdf::dictionary! {
224                        "Image1" => Object::Reference((page_id.0 + 2, 0)),
225                    },
226                },
227            });
228
229            // Create content stream that displays the image
230            let content = format!("q\n{page_width} 0 0 {page_height} 0 0 cm\n/Image1 Do\nQ");
231            let content_id = doc.add_object(lopdf::Stream::new(
232                lopdf::dictionary! {},
233                content.into_bytes(),
234            ));
235
236            // Add the PNG image as an XObject
237            let image_id = doc.add_object(lopdf::Stream::new(
238                lopdf::dictionary! {
239                    "Type" => "XObject",
240                    "Subtype" => "Image",
241                    "Width" => i64::from(width),
242                    "Height" => i64::from(height),
243                    "ColorSpace" => "DeviceRGB",
244                    "BitsPerComponent" => 8,
245                    "Filter" => "FlateDecode",
246                },
247                png_bytes,
248            ));
249
250            // Verify object IDs match what we referenced
251            assert_eq!(page, (page_id.0, 0));
252            assert_eq!(content_id, (page_id.0 + 1, 0));
253            assert_eq!(image_id, (page_id.0 + 2, 0));
254
255            // Add page to pages collection
256            if doc.catalog().is_err() {
257                // Create catalog and pages root
258                let pages_obj_id = doc.new_object_id();
259                let catalog_id = doc.add_object(lopdf::dictionary! {
260                    "Type" => "Catalog",
261                    "Pages" => Object::Reference(pages_obj_id),
262                });
263                doc.trailer.set("Root", Object::Reference(catalog_id));
264
265                doc.objects.insert(
266                    pages_obj_id,
267                    lopdf::Object::Dictionary(lopdf::dictionary! {
268                        "Type" => "Pages",
269                        "Kids" => vec![Object::Reference(page)],
270                        "Count" => 1,
271                    }),
272                );
273            } else {
274                // Add to existing pages
275                if let Ok(pages_ref) = doc.catalog().and_then(|c| c.get(b"Pages"))
276                    && let Ok(pages_obj_id) = pages_ref.as_reference()
277                    && let Ok(pages_dict) = doc.get_object_mut(pages_obj_id)
278                    && let Object::Dictionary(dict) = pages_dict
279                {
280                    // Get current kids array
281                    let mut kids = if let Ok(Object::Array(arr)) = dict.get(b"Kids") {
282                        arr.clone()
283                    } else {
284                        vec![]
285                    };
286                    kids.push(Object::Reference(page));
287
288                    dict.set("Kids", Object::Array(kids));
289                    #[expect(clippy::cast_possible_wrap, reason = "page count fits in i64")]
290                    dict.set("Count", (page_index + 1) as i64);
291                }
292            }
293        }
294
295        // Serialize to bytes
296        let mut pdf_bytes = Vec::new();
297        doc.save_to(&mut BufWriter::new(&mut pdf_bytes))
298            .map_err(|e| PdfError::RebuildFailed {
299                reason: e.to_string(),
300            })?;
301
302        // Build metadata
303        let mut metadata = HashMap::new();
304        metadata.insert(KEY_PAGE_COUNT.to_string(), images.len().to_string());
305
306        Ok(CoverMedia {
307            kind: CoverMediaKind::PdfDocument,
308            data: Bytes::from(pdf_bytes),
309            metadata,
310        })
311    }
312
313    fn embed_in_content_stream(
314        &self,
315        pdf: CoverMedia,
316        payload: &Payload,
317    ) -> Result<CoverMedia, PdfError> {
318        // Load PDF from bytes
319        let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
320            reason: e.to_string(),
321        })?;
322
323        // Convert payload to bits
324        let payload_bits: Vec<u8> = payload
325            .as_bytes()
326            .iter()
327            .flat_map(|byte| (0..8).rev().map(move |i| (byte >> i) & 1))
328            .collect();
329
330        let mut bit_index = 0;
331
332        // Iterate through all objects to find content streams
333        let object_ids: Vec<_> = doc.objects.keys().copied().collect();
334        for obj_id in object_ids {
335            if bit_index >= payload_bits.len() {
336                break;
337            }
338
339            if let Ok(obj) = doc.get_object_mut(obj_id)
340                && let Object::Stream(stream) = obj
341            {
342                // Parse content stream
343                let content = String::from_utf8_lossy(&stream.content);
344                let mut modified_content = String::new();
345                let mut tokens: Vec<&str> = content.split_whitespace().collect();
346
347                for token in &mut tokens {
348                    if bit_index >= payload_bits.len() {
349                        modified_content.push_str(token);
350                        modified_content.push(' ');
351                        continue;
352                    }
353
354                    // Check if token is a number
355                    if let Ok(mut num) = token.parse::<i32>() {
356                        // Embed bit in LSB — bit_index < payload_bits.len() guaranteed by guard above
357                        if let Some(&bit) = payload_bits.get(bit_index) {
358                            if bit == 1 {
359                                num |= 1; // Set LSB
360                            } else {
361                                num &= !1; // Clear LSB
362                            }
363                        }
364                        modified_content.push_str(&num.to_string());
365                        bit_index += 1;
366                    } else {
367                        modified_content.push_str(token);
368                    }
369                    modified_content.push(' ');
370                }
371
372                // Update stream content
373                stream.set_content(modified_content.trim().as_bytes().to_vec());
374            }
375        }
376
377        if bit_index < payload_bits.len() {
378            return Err(PdfError::EmbedFailed {
379                reason: format!(
380                    "insufficient capacity: embedded {bit_index}/{} bits",
381                    payload_bits.len()
382                ),
383            });
384        }
385
386        // Serialize modified PDF
387        let mut pdf_bytes = Vec::new();
388        doc.save_to(&mut pdf_bytes)
389            .map_err(|e| PdfError::EmbedFailed {
390                reason: e.to_string(),
391            })?;
392
393        Ok(CoverMedia {
394            kind: pdf.kind,
395            data: Bytes::from(pdf_bytes),
396            metadata: pdf.metadata,
397        })
398    }
399
400    fn extract_from_content_stream(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
401        // Load PDF from bytes
402        let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
403            reason: e.to_string(),
404        })?;
405
406        let mut extracted_bits = Vec::new();
407
408        // Iterate through all objects to find content streams
409        for obj in doc.objects.values() {
410            if let Object::Stream(stream) = obj {
411                // Parse content stream
412                let content = String::from_utf8_lossy(&stream.content);
413                let tokens: Vec<&str> = content.split_whitespace().collect();
414
415                for token in tokens {
416                    // Check if token is a number
417                    if let Ok(num) = token.parse::<i32>() {
418                        // Extract LSB
419                        #[expect(clippy::cast_sign_loss, reason = "LSB is always 0 or 1")]
420                        extracted_bits.push((num & 1) as u8);
421                    }
422                }
423            }
424        }
425
426        // Convert bits to bytes
427        if extracted_bits.is_empty() {
428            return Err(PdfError::ExtractFailed {
429                reason: "no numeric values found in content streams".to_string(),
430            });
431        }
432
433        let mut payload_bytes = Vec::new();
434        for chunk in extracted_bits.chunks(8) {
435            if chunk.len() == 8 {
436                let mut byte = 0u8;
437                for (i, bit) in chunk.iter().enumerate() {
438                    byte |= bit << (7 - i);
439                }
440                payload_bytes.push(byte);
441            }
442        }
443
444        Ok(Payload::from_bytes(payload_bytes))
445    }
446
447    fn embed_in_metadata(
448        &self,
449        pdf: CoverMedia,
450        payload: &Payload,
451    ) -> Result<CoverMedia, PdfError> {
452        // Load PDF from bytes
453        let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
454            reason: e.to_string(),
455        })?;
456
457        // Base64-encode payload
458        let encoded = general_purpose::STANDARD.encode(payload.as_bytes());
459
460        // Create XMP metadata with custom field
461        let xmp_content = format!(
462            r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
463<x:xmpmeta xmlns:x="adobe:ns:meta/">
464  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
465    <rdf:Description rdf:about=""
466      xmlns:sf="http://shadowforge.org/ns/1.0/">
467      <sf:HiddenData>{encoded}</sf:HiddenData>
468    </rdf:Description>
469  </rdf:RDF>
470</x:xmpmeta>
471<?xpacket end="w"?>"#
472        );
473
474        // Create metadata stream
475        let metadata_id = doc.add_object(lopdf::Stream::new(
476            lopdf::dictionary! {
477                "Type" => "Metadata",
478                "Subtype" => "XML",
479            },
480            xmp_content.into_bytes(),
481        ));
482
483        // Add metadata reference to catalog
484        if let Ok(catalog) = doc.catalog_mut() {
485            catalog.set("Metadata", Object::Reference(metadata_id));
486        } else {
487            return Err(PdfError::EmbedFailed {
488                reason: "failed to access catalog".to_string(),
489            });
490        }
491
492        // Serialize modified PDF
493        let mut pdf_bytes = Vec::new();
494        doc.save_to(&mut pdf_bytes)
495            .map_err(|e| PdfError::EmbedFailed {
496                reason: e.to_string(),
497            })?;
498
499        Ok(CoverMedia {
500            kind: pdf.kind,
501            data: Bytes::from(pdf_bytes),
502            metadata: pdf.metadata,
503        })
504    }
505
506    fn extract_from_metadata(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
507        // Load PDF from bytes
508        let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
509            reason: e.to_string(),
510        })?;
511
512        // Get catalog
513        let catalog = doc.catalog().map_err(|e| PdfError::ExtractFailed {
514            reason: format!("failed to access catalog: {e}"),
515        })?;
516
517        // Get metadata reference
518        let metadata_ref = catalog
519            .get(b"Metadata")
520            .map_err(|_| PdfError::ExtractFailed {
521                reason: "no metadata found in catalog".to_string(),
522            })?
523            .as_reference()
524            .map_err(|_| PdfError::ExtractFailed {
525                reason: "metadata is not a reference".to_string(),
526            })?;
527
528        // Get metadata stream
529        let metadata_obj = doc
530            .get_object(metadata_ref)
531            .map_err(|e| PdfError::ExtractFailed {
532                reason: format!("failed to get metadata object: {e}"),
533            })?;
534
535        let metadata_stream = metadata_obj
536            .as_stream()
537            .map_err(|_| PdfError::ExtractFailed {
538                reason: "metadata is not a stream".to_string(),
539            })?;
540
541        // Parse XMP content
542        let xmp_content = String::from_utf8_lossy(&metadata_stream.content);
543
544        // Extract base64 data from <sf:HiddenData> tag
545        let start_tag = "<sf:HiddenData>";
546        let end_tag = "</sf:HiddenData>";
547
548        let start_idx = xmp_content
549            .find(start_tag)
550            .ok_or_else(|| PdfError::ExtractFailed {
551                reason: "no sf:HiddenData tag found".to_string(),
552            })?
553            .strict_add(start_tag.len());
554
555        let end_idx = xmp_content
556            .find(end_tag)
557            .ok_or_else(|| PdfError::ExtractFailed {
558                reason: "no closing sf:HiddenData tag found".to_string(),
559            })?;
560
561        let encoded_data = &xmp_content[start_idx..end_idx];
562
563        // Decode base64
564        let decoded = general_purpose::STANDARD
565            .decode(encoded_data.trim())
566            .map_err(|e| PdfError::ExtractFailed {
567                reason: format!("base64 decode failed: {e}"),
568            })?;
569
570        Ok(Payload::from_bytes(decoded))
571    }
572}
573
574// ─── Tests ────────────────────────────────────────────────────────────────────
575
576#[cfg(test)]
577mod tests {
578    use super::*;
579    use tempfile::tempdir;
580
581    type TestResult = Result<(), Box<dyn std::error::Error>>;
582
583    #[test]
584    fn test_load_minimal_pdf() -> TestResult {
585        let processor = PdfProcessorImpl::default();
586        let dir = tempdir()?;
587        let path = dir.path().join("minimal.pdf");
588
589        // Create a minimal valid PDF with one page
590        let mut doc = Document::with_version("1.7");
591        let catalog_pages = doc.new_object_id();
592        let first_page = doc.new_object_id();
593
594        doc.objects.insert(
595            first_page,
596            Object::Dictionary(lopdf::dictionary! {
597                "Type" => "Page",
598                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
599                "Contents" => Object::Reference((first_page.0 + 1, 0)),
600            }),
601        );
602
603        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
604
605        doc.objects.insert(
606            catalog_pages,
607            Object::Dictionary(lopdf::dictionary! {
608                "Type" => "Pages",
609                "Kids" => vec![Object::Reference(first_page)],
610                "Count" => 1,
611            }),
612        );
613
614        let catalog_id = doc.add_object(lopdf::dictionary! {
615            "Type" => "Catalog",
616            "Pages" => Object::Reference(catalog_pages),
617        });
618
619        doc.trailer.set("Root", Object::Reference(catalog_id));
620        doc.save(&path)?;
621
622        // Load it
623        let media = processor.load_pdf(&path)?;
624        assert_eq!(media.kind, CoverMediaKind::PdfDocument);
625        assert_eq!(media.metadata.get(KEY_PAGE_COUNT), Some(&"1".to_string()));
626        Ok(())
627    }
628
629    #[test]
630    #[ignore = "requires pdfium system library"]
631    fn test_render_pages_returns_correct_count() -> TestResult {
632        let processor = PdfProcessorImpl::default();
633        let dir = tempdir()?;
634        let path = dir.path().join("two_page.pdf");
635
636        // Create a 2-page PDF
637        let mut doc = Document::with_version("1.7");
638        let catalog_pages = doc.new_object_id();
639
640        let page1_id = doc.new_object_id();
641        doc.objects.insert(
642            page1_id,
643            Object::Dictionary(lopdf::dictionary! {
644                "Type" => "Page",
645                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
646                "Contents" => Object::Reference((page1_id.0 + 1, 0)),
647            }),
648        );
649        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
650
651        let page2_id = doc.new_object_id();
652        doc.objects.insert(
653            page2_id,
654            Object::Dictionary(lopdf::dictionary! {
655                "Type" => "Page",
656                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
657                "Contents" => Object::Reference((page2_id.0 + 1, 0)),
658            }),
659        );
660        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
661
662        doc.objects.insert(
663            catalog_pages,
664            Object::Dictionary(lopdf::dictionary! {
665                "Type" => "Pages",
666                "Kids" => vec![
667                    Object::Reference(page1_id),
668                    Object::Reference(page2_id),
669                ],
670                "Count" => 2,
671            }),
672        );
673
674        let catalog_id = doc.add_object(lopdf::dictionary! {
675            "Type" => "Catalog",
676            "Pages" => Object::Reference(catalog_pages),
677        });
678
679        doc.trailer.set("Root", Object::Reference(catalog_id));
680        doc.save(&path)?;
681
682        // Load and render
683        let media = processor.load_pdf(&path)?;
684        let images = processor.render_pages_to_images(&media)?;
685        assert_eq!(images.len(), 2);
686        Ok(())
687    }
688
689    #[test]
690    #[ignore = "requires pdfium system library"]
691    fn test_rebuild_pdf_roundtrip() -> TestResult {
692        let processor = PdfProcessorImpl::default();
693        let dir = tempdir()?;
694        let path = dir.path().join("original.pdf");
695
696        // Create a 2-page PDF
697        let mut doc = Document::with_version("1.7");
698        let catalog_pages = doc.new_object_id();
699
700        let page1_id = doc.new_object_id();
701        doc.objects.insert(
702            page1_id,
703            Object::Dictionary(lopdf::dictionary! {
704                "Type" => "Page",
705                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
706                "Contents" => Object::Reference((page1_id.0 + 1, 0)),
707            }),
708        );
709        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
710
711        let page2_id = doc.new_object_id();
712        doc.objects.insert(
713            page2_id,
714            Object::Dictionary(lopdf::dictionary! {
715                "Type" => "Page",
716                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
717                "Contents" => Object::Reference((page2_id.0 + 1, 0)),
718            }),
719        );
720        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
721
722        doc.objects.insert(
723            catalog_pages,
724            Object::Dictionary(lopdf::dictionary! {
725                "Type" => "Pages",
726                "Kids" => vec![
727                    Object::Reference(page1_id),
728                    Object::Reference(page2_id),
729                ],
730                "Count" => 2,
731            }),
732        );
733
734        let catalog_id = doc.add_object(lopdf::dictionary! {
735            "Type" => "Catalog",
736            "Pages" => Object::Reference(catalog_pages),
737        });
738
739        doc.trailer.set("Root", Object::Reference(catalog_id));
740        doc.save(&path)?;
741
742        // Load, render, rebuild, and reload
743        let original = processor.load_pdf(&path)?;
744        let images = processor.render_pages_to_images(&original)?;
745        let rebuilt = processor.rebuild_pdf_from_images(images, &original)?;
746
747        // Save and reload to verify
748        let rebuilt_path = dir.path().join("rebuilt.pdf");
749        processor.save_pdf(&rebuilt, &rebuilt_path)?;
750        let reloaded = processor.load_pdf(&rebuilt_path)?;
751
752        assert_eq!(
753            reloaded.metadata.get(KEY_PAGE_COUNT),
754            original.metadata.get(KEY_PAGE_COUNT)
755        );
756        Ok(())
757    }
758
759    #[test]
760    #[ignore = "lopdf requires actual encrypted content, not just Encrypt trailer"]
761    fn test_encrypted_pdf_error() -> TestResult {
762        let processor = PdfProcessorImpl::default();
763        let dir = tempdir()?;
764        let path = dir.path().join("encrypted.pdf");
765
766        // Create an encrypted PDF
767        let mut doc = Document::with_version("1.7");
768        let catalog_pages = doc.new_object_id();
769        let first_page = doc.new_object_id();
770
771        doc.objects.insert(
772            first_page,
773            Object::Dictionary(lopdf::dictionary! {
774                "Type" => "Page",
775                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
776                "Contents" => Object::Reference((first_page.0 + 1, 0)),
777            }),
778        );
779
780        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
781
782        doc.objects.insert(
783            catalog_pages,
784            Object::Dictionary(lopdf::dictionary! {
785                "Type" => "Pages",
786                "Kids" => vec![Object::Reference(first_page)],
787                "Count" => 1,
788            }),
789        );
790
791        let catalog_id = doc.add_object(lopdf::dictionary! {
792            "Type" => "Catalog",
793            "Pages" => Object::Reference(catalog_pages),
794        });
795
796        doc.trailer.set("Root", Object::Reference(catalog_id));
797
798        // Add encryption dictionary
799        doc.trailer
800            .set("Encrypt", Object::Reference((doc.max_id + 1, 0)));
801        doc.objects.insert(
802            (doc.max_id + 1, 0),
803            Object::Dictionary(lopdf::dictionary! {
804                "Filter" => "Standard",
805                "V" => 1,
806                "R" => 2,
807            }),
808        );
809
810        doc.save(&path)?;
811
812        // Try to load it
813        let result = processor.load_pdf(&path);
814        assert!(matches!(result, Err(PdfError::Encrypted)));
815        Ok(())
816    }
817
818    #[test]
819    fn test_content_stream_lsb_roundtrip() -> TestResult {
820        let processor = PdfProcessorImpl::default();
821        let dir = tempdir()?;
822        let path = dir.path().join("test.pdf");
823
824        // Create a test PDF with content stream
825        let mut doc = Document::with_version("1.7");
826        let catalog_pages = doc.new_object_id();
827        let first_page = doc.new_object_id();
828
829        doc.objects.insert(
830            first_page,
831            Object::Dictionary(lopdf::dictionary! {
832                "Type" => "Page",
833                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
834                "Contents" => Object::Reference((first_page.0 + 1, 0)),
835            }),
836        );
837
838        // Content stream with many numeric values for capacity
839        let content = b"BT\n/F1 12 Tf\n100 700 Td\n(Hello) Tj\n200 650 Td\n(World) Tj\n50 600 Td\n(Test) Tj\n150 550 Td\n(PDF) Tj\nET\n1 0 0 1 0 0 cm\n";
840        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, content.to_vec()));
841
842        doc.objects.insert(
843            catalog_pages,
844            Object::Dictionary(lopdf::dictionary! {
845                "Type" => "Pages",
846                "Kids" => vec![Object::Reference(first_page)],
847                "Count" => 1,
848            }),
849        );
850
851        let catalog_id = doc.add_object(lopdf::dictionary! {
852            "Type" => "Catalog",
853            "Pages" => Object::Reference(catalog_pages),
854        });
855
856        doc.trailer.set("Root", Object::Reference(catalog_id));
857        doc.save(&path)?;
858
859        // Load and embed payload (very small to fit limited capacity)
860        let original = processor.load_pdf(&path)?;
861        let payload = Payload::from_bytes(vec![0xAB]); // 1 byte = 8 bits (need 8+ numbers)
862        let stego = processor.embed_in_content_stream(original, &payload)?;
863
864        // Verify PDF is still parseable
865        let stego_path = dir.path().join("stego.pdf");
866        processor.save_pdf(&stego, &stego_path)?;
867        let reloaded = processor.load_pdf(&stego_path)?;
868
869        // Extract and verify
870        let extracted = processor.extract_from_content_stream(&reloaded)?;
871        assert_eq!(extracted.as_bytes(), payload.as_bytes());
872        Ok(())
873    }
874
875    #[test]
876    fn test_metadata_embed_roundtrip() -> TestResult {
877        let processor = PdfProcessorImpl::default();
878        let dir = tempdir()?;
879        let path = dir.path().join("test.pdf");
880
881        // Create a minimal test PDF
882        let mut doc = Document::with_version("1.7");
883        let catalog_pages = doc.new_object_id();
884        let first_page = doc.new_object_id();
885
886        doc.objects.insert(
887            first_page,
888            Object::Dictionary(lopdf::dictionary! {
889                "Type" => "Page",
890                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
891                "Contents" => Object::Reference((first_page.0 + 1, 0)),
892            }),
893        );
894
895        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
896
897        doc.objects.insert(
898            catalog_pages,
899            Object::Dictionary(lopdf::dictionary! {
900                "Type" => "Pages",
901                "Kids" => vec![Object::Reference(first_page)],
902                "Count" => 1,
903            }),
904        );
905
906        let catalog_id = doc.add_object(lopdf::dictionary! {
907            "Type" => "Catalog",
908            "Pages" => Object::Reference(catalog_pages),
909        });
910
911        doc.trailer.set("Root", Object::Reference(catalog_id));
912        doc.save(&path)?;
913
914        // Load and embed payload
915        let original = processor.load_pdf(&path)?;
916        let payload = Payload::from_bytes(vec![0u8; 128]); // 128-byte payload
917        let stego = processor.embed_in_metadata(original, &payload)?;
918
919        // Verify PDF is still parseable
920        let stego_path = dir.path().join("stego.pdf");
921        processor.save_pdf(&stego, &stego_path)?;
922        let reloaded = processor.load_pdf(&stego_path)?;
923
924        // Extract and verify
925        let extracted = processor.extract_from_metadata(&reloaded)?;
926        assert_eq!(extracted.as_bytes(), payload.as_bytes());
927        Ok(())
928    }
929}