Skip to main content

shadowforge_lib/adapters/
pdf.rs

1//! PDF processing adapter using lopdf and pdfium-render.
2
3use std::collections::HashMap;
4use std::io::BufWriter;
5use std::path::Path;
6
7use base64::Engine;
8use base64::engine::general_purpose;
9use bytes::Bytes;
10use image::{DynamicImage, ImageFormat};
11use lopdf::{Document, Object, dictionary};
12use pdfium_render::prelude::*;
13
14use crate::domain::analysis::estimate_capacity;
15use crate::domain::errors::{PdfError, StegoError};
16use crate::domain::ports::{EmbedTechnique, ExtractTechnique, PdfProcessor};
17use crate::domain::types::{Capacity, CoverMedia, CoverMediaKind, Payload, StegoTechnique};
18
19// Metadata keys
20const KEY_PAGE_COUNT: &str = "page_count";
21const DEFAULT_DPI: u16 = 150;
22
23/// PDF processor implementation using lopdf and pdfium-render.
24///
25/// Handles PDF loading/saving, page rasterisation, and PDF reconstruction.
26#[derive(Debug)]
27pub struct PdfProcessorImpl {
28    /// DPI for page rasterisation.
29    dpi: u16,
30}
31
32impl Default for PdfProcessorImpl {
33    fn default() -> Self {
34        Self { dpi: DEFAULT_DPI }
35    }
36}
37
38impl PdfProcessorImpl {
39    /// Create a new PDF processor with the given DPI.
40    #[must_use]
41    pub const fn new(dpi: u16) -> Self {
42        Self { dpi }
43    }
44}
45
46impl PdfProcessor for PdfProcessorImpl {
47    fn load_pdf(&self, path: &Path) -> Result<CoverMedia, PdfError> {
48        // Load PDF document
49        let doc = Document::load(path).map_err(|e| PdfError::ParseFailed {
50            reason: e.to_string(),
51        })?;
52
53        // Check if encrypted
54        if doc.is_encrypted() {
55            return Err(PdfError::Encrypted);
56        }
57
58        // Count pages
59        let page_count = doc.get_pages().len();
60
61        // Read raw bytes
62        let bytes = std::fs::read(path).map_err(|e| PdfError::IoError {
63            reason: e.to_string(),
64        })?;
65
66        // Build metadata
67        let mut metadata = HashMap::new();
68        metadata.insert(KEY_PAGE_COUNT.to_string(), page_count.to_string());
69
70        Ok(CoverMedia {
71            kind: CoverMediaKind::PdfDocument,
72            data: Bytes::from(bytes),
73            metadata,
74        })
75    }
76
77    fn save_pdf(&self, media: &CoverMedia, path: &Path) -> Result<(), PdfError> {
78        // Write raw PDF bytes to file
79        std::fs::write(path, &media.data).map_err(|e| PdfError::IoError {
80            reason: e.to_string(),
81        })?;
82
83        Ok(())
84    }
85
86    fn render_pages_to_images(&self, pdf: &CoverMedia) -> Result<Vec<CoverMedia>, PdfError> {
87        // Initialize pdfium library
88        let pdfium = Pdfium::new(
89            Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./"))
90                .or_else(|_| Pdfium::bind_to_system_library())
91                .map_err(|e| PdfError::RenderFailed {
92                    page: 0,
93                    reason: format!("Failed to load pdfium library: {e}"),
94                })?,
95        );
96
97        // Load PDF from bytes
98        let document = pdfium
99            .load_pdf_from_byte_vec(pdf.data.to_vec(), None)
100            .map_err(|e| PdfError::ParseFailed {
101                reason: e.to_string(),
102            })?;
103
104        let page_count = document.pages().len();
105        let mut images = Vec::with_capacity(page_count as usize);
106
107        // Render each page
108        for page_index in 0..page_count {
109            let page = document
110                .pages()
111                .get(page_index)
112                .map_err(|e| PdfError::RenderFailed {
113                    page: page_index as usize,
114                    reason: e.to_string(),
115                })?;
116
117            // Render to bitmap
118            #[expect(
119                clippy::cast_possible_truncation,
120                reason = "DPI calculation for render"
121            )]
122            let target_width = (page.width().value * f32::from(self.dpi) / 72.0) as i32;
123
124            let bitmap = page
125                .render_with_config(&PdfRenderConfig::new().set_target_width(target_width))
126                .map_err(|e| PdfError::RenderFailed {
127                    page: page_index as usize,
128                    reason: e.to_string(),
129                })?;
130
131            // Convert to RGBA8 image
132            let width = bitmap.width().cast_unsigned();
133            let height = bitmap.height().cast_unsigned();
134            let rgba_data = bitmap.as_rgba_bytes();
135
136            let img =
137                image::RgbaImage::from_raw(width, height, rgba_data.clone()).ok_or_else(|| {
138                    PdfError::RenderFailed {
139                        page: page_index as usize,
140                        reason: "invalid bitmap dimensions".to_string(),
141                    }
142                })?;
143
144            // Build metadata
145            let mut metadata = HashMap::new();
146            metadata.insert("width".to_string(), width.to_string());
147            metadata.insert("height".to_string(), height.to_string());
148            metadata.insert("format".to_string(), "Png".to_string());
149            metadata.insert("page_index".to_string(), page_index.to_string());
150
151            images.push(CoverMedia {
152                kind: CoverMediaKind::PngImage,
153                data: Bytes::from(img.into_raw()),
154                metadata,
155            });
156        }
157
158        Ok(images)
159    }
160
161    #[expect(
162        clippy::too_many_lines,
163        reason = "PDF reconstruction logic is inherently complex"
164    )]
165    fn rebuild_pdf_from_images(
166        &self,
167        images: Vec<CoverMedia>,
168        _original: &CoverMedia,
169    ) -> Result<CoverMedia, PdfError> {
170        // Create a new PDF document
171        let mut doc = Document::with_version("1.7");
172
173        // Add each image as a page
174        for (page_index, img_media) in images.iter().enumerate() {
175            // Parse dimensions from metadata
176            let width: u32 = img_media
177                .metadata
178                .get("width")
179                .ok_or_else(|| PdfError::RebuildFailed {
180                    reason: "missing width metadata".to_string(),
181                })?
182                .parse()
183                .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
184                    reason: e.to_string(),
185                })?;
186
187            let height: u32 = img_media
188                .metadata
189                .get("height")
190                .ok_or_else(|| PdfError::RebuildFailed {
191                    reason: "missing height metadata".to_string(),
192                })?
193                .parse()
194                .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
195                    reason: e.to_string(),
196                })?;
197
198            // Convert RGBA data to PNG bytes
199            let img = image::RgbaImage::from_raw(width, height, img_media.data.to_vec())
200                .ok_or_else(|| PdfError::RebuildFailed {
201                    reason: "invalid image dimensions or data length".to_string(),
202                })?;
203
204            let dynamic_img = DynamicImage::ImageRgba8(img);
205            let mut png_bytes = Vec::new();
206            dynamic_img
207                .write_to(&mut std::io::Cursor::new(&mut png_bytes), ImageFormat::Png)
208                .map_err(|e| PdfError::RebuildFailed {
209                    reason: e.to_string(),
210                })?;
211
212            // Create a page with the image dimensions (convert pixels to points: 72 DPI)
213            #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
214            let page_width = width as f32 * 72.0 / f32::from(self.dpi);
215            #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
216            let page_height = height as f32 * 72.0 / f32::from(self.dpi);
217
218            let page_id = doc.new_object_id();
219            let page = doc.add_object(lopdf::dictionary! {
220                "Type" => "Page",
221                "MediaBox" => vec![0.into(), 0.into(), page_width.into(), page_height.into()],
222                "Contents" => Object::Reference((page_id.0 + 1, 0)),
223                "Resources" => lopdf::dictionary! {
224                    "XObject" => lopdf::dictionary! {
225                        "Image1" => Object::Reference((page_id.0 + 2, 0)),
226                    },
227                },
228            });
229
230            // Create content stream that displays the image
231            let content = format!("q\n{page_width} 0 0 {page_height} 0 0 cm\n/Image1 Do\nQ");
232            let content_id = doc.add_object(lopdf::Stream::new(
233                lopdf::dictionary! {},
234                content.into_bytes(),
235            ));
236
237            // Add the PNG image as an XObject
238            let image_id = doc.add_object(lopdf::Stream::new(
239                lopdf::dictionary! {
240                    "Type" => "XObject",
241                    "Subtype" => "Image",
242                    "Width" => i64::from(width),
243                    "Height" => i64::from(height),
244                    "ColorSpace" => "DeviceRGB",
245                    "BitsPerComponent" => 8,
246                    "Filter" => "FlateDecode",
247                },
248                png_bytes,
249            ));
250
251            // Verify object IDs match what we referenced
252            assert_eq!(page, (page_id.0, 0));
253            assert_eq!(content_id, (page_id.0 + 1, 0));
254            assert_eq!(image_id, (page_id.0 + 2, 0));
255
256            // Add page to pages collection
257            if doc.catalog().is_err() {
258                // Create catalog and pages root
259                let pages_obj_id = doc.new_object_id();
260                let catalog_id = doc.add_object(lopdf::dictionary! {
261                    "Type" => "Catalog",
262                    "Pages" => Object::Reference(pages_obj_id),
263                });
264                doc.trailer.set("Root", Object::Reference(catalog_id));
265
266                doc.objects.insert(
267                    pages_obj_id,
268                    lopdf::Object::Dictionary(lopdf::dictionary! {
269                        "Type" => "Pages",
270                        "Kids" => vec![Object::Reference(page)],
271                        "Count" => 1,
272                    }),
273                );
274            } else {
275                // Add to existing pages
276                if let Ok(pages_ref) = doc.catalog().and_then(|c| c.get(b"Pages"))
277                    && let Ok(pages_obj_id) = pages_ref.as_reference()
278                    && let Ok(pages_dict) = doc.get_object_mut(pages_obj_id)
279                    && let Object::Dictionary(dict) = pages_dict
280                {
281                    // Get current kids array
282                    let mut kids = if let Ok(Object::Array(arr)) = dict.get(b"Kids") {
283                        arr.clone()
284                    } else {
285                        vec![]
286                    };
287                    kids.push(Object::Reference(page));
288
289                    dict.set("Kids", Object::Array(kids));
290                    #[expect(clippy::cast_possible_wrap, reason = "page count fits in i64")]
291                    dict.set("Count", (page_index + 1) as i64);
292                }
293            }
294        }
295
296        // Serialize to bytes
297        let mut pdf_bytes = Vec::new();
298        doc.save_to(&mut BufWriter::new(&mut pdf_bytes))
299            .map_err(|e| PdfError::RebuildFailed {
300                reason: e.to_string(),
301            })?;
302
303        // Build metadata
304        let mut metadata = HashMap::new();
305        metadata.insert(KEY_PAGE_COUNT.to_string(), images.len().to_string());
306
307        Ok(CoverMedia {
308            kind: CoverMediaKind::PdfDocument,
309            data: Bytes::from(pdf_bytes),
310            metadata,
311        })
312    }
313
314    fn embed_in_content_stream(
315        &self,
316        pdf: CoverMedia,
317        payload: &Payload,
318    ) -> Result<CoverMedia, PdfError> {
319        // Load PDF from bytes
320        let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
321            reason: e.to_string(),
322        })?;
323
324        // Convert payload to bits
325        let payload_bits: Vec<u8> = payload
326            .as_bytes()
327            .iter()
328            .flat_map(|byte| (0..8).rev().map(move |i| (byte >> i) & 1))
329            .collect();
330
331        let mut bit_index = 0;
332
333        // Iterate through all objects to find content streams
334        let object_ids: Vec<_> = doc.objects.keys().copied().collect();
335        for obj_id in object_ids {
336            if bit_index >= payload_bits.len() {
337                break;
338            }
339
340            if let Ok(obj) = doc.get_object_mut(obj_id)
341                && let Object::Stream(stream) = obj
342            {
343                // Parse content stream
344                let content = String::from_utf8_lossy(&stream.content);
345                let mut modified_content = String::new();
346                let mut tokens: Vec<&str> = content.split_whitespace().collect();
347
348                for token in &mut tokens {
349                    if bit_index >= payload_bits.len() {
350                        modified_content.push_str(token);
351                        modified_content.push(' ');
352                        continue;
353                    }
354
355                    // Check if token is a number
356                    if let Ok(mut num) = token.parse::<i32>() {
357                        // Embed bit in LSB — bit_index < payload_bits.len() guaranteed by guard above
358                        if let Some(&bit) = payload_bits.get(bit_index) {
359                            if bit == 1 {
360                                num |= 1; // Set LSB
361                            } else {
362                                num &= !1; // Clear LSB
363                            }
364                        }
365                        modified_content.push_str(&num.to_string());
366                        bit_index += 1;
367                    } else {
368                        modified_content.push_str(token);
369                    }
370                    modified_content.push(' ');
371                }
372
373                // Update stream content
374                stream.set_content(modified_content.trim().as_bytes().to_vec());
375            }
376        }
377
378        if bit_index < payload_bits.len() {
379            return Err(PdfError::EmbedFailed {
380                reason: format!(
381                    "insufficient capacity: embedded {bit_index}/{} bits",
382                    payload_bits.len()
383                ),
384            });
385        }
386
387        // Serialize modified PDF
388        let mut pdf_bytes = Vec::new();
389        doc.save_to(&mut pdf_bytes)
390            .map_err(|e| PdfError::EmbedFailed {
391                reason: e.to_string(),
392            })?;
393
394        Ok(CoverMedia {
395            kind: pdf.kind,
396            data: Bytes::from(pdf_bytes),
397            metadata: pdf.metadata,
398        })
399    }
400
401    fn extract_from_content_stream(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
402        // Load PDF from bytes
403        let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
404            reason: e.to_string(),
405        })?;
406
407        let mut extracted_bits = Vec::new();
408
409        // Iterate through all objects to find content streams
410        for obj in doc.objects.values() {
411            if let Object::Stream(stream) = obj {
412                // Parse content stream
413                let content = String::from_utf8_lossy(&stream.content);
414                let tokens: Vec<&str> = content.split_whitespace().collect();
415
416                for token in tokens {
417                    // Check if token is a number
418                    if let Ok(num) = token.parse::<i32>() {
419                        // Extract LSB
420                        #[expect(clippy::cast_sign_loss, reason = "LSB is always 0 or 1")]
421                        extracted_bits.push((num & 1) as u8);
422                    }
423                }
424            }
425        }
426
427        // Convert bits to bytes
428        if extracted_bits.is_empty() {
429            return Err(PdfError::ExtractFailed {
430                reason: "no numeric values found in content streams".to_string(),
431            });
432        }
433
434        let mut payload_bytes = Vec::new();
435        for chunk in extracted_bits.chunks(8) {
436            if chunk.len() == 8 {
437                let mut byte = 0u8;
438                for (i, bit) in chunk.iter().enumerate() {
439                    byte |= bit << (7 - i);
440                }
441                payload_bytes.push(byte);
442            }
443        }
444
445        Ok(Payload::from_bytes(payload_bytes))
446    }
447
448    fn embed_in_metadata(
449        &self,
450        pdf: CoverMedia,
451        payload: &Payload,
452    ) -> Result<CoverMedia, PdfError> {
453        // Load PDF from bytes
454        let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
455            reason: e.to_string(),
456        })?;
457
458        // Base64-encode payload
459        let encoded = general_purpose::STANDARD.encode(payload.as_bytes());
460
461        // Create XMP metadata with custom field
462        let xmp_content = format!(
463            r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
464<x:xmpmeta xmlns:x="adobe:ns:meta/">
465  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
466    <rdf:Description rdf:about=""
467      xmlns:sf="http://shadowforge.org/ns/1.0/">
468      <sf:HiddenData>{encoded}</sf:HiddenData>
469    </rdf:Description>
470  </rdf:RDF>
471</x:xmpmeta>
472<?xpacket end="w"?>"#
473        );
474
475        // Create metadata stream
476        let metadata_id = doc.add_object(lopdf::Stream::new(
477            lopdf::dictionary! {
478                "Type" => "Metadata",
479                "Subtype" => "XML",
480            },
481            xmp_content.into_bytes(),
482        ));
483
484        // Add metadata reference to catalog
485        if let Ok(catalog) = doc.catalog_mut() {
486            catalog.set("Metadata", Object::Reference(metadata_id));
487        } else {
488            return Err(PdfError::EmbedFailed {
489                reason: "failed to access catalog".to_string(),
490            });
491        }
492
493        // Serialize modified PDF
494        let mut pdf_bytes = Vec::new();
495        doc.save_to(&mut pdf_bytes)
496            .map_err(|e| PdfError::EmbedFailed {
497                reason: e.to_string(),
498            })?;
499
500        Ok(CoverMedia {
501            kind: pdf.kind,
502            data: Bytes::from(pdf_bytes),
503            metadata: pdf.metadata,
504        })
505    }
506
507    fn extract_from_metadata(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
508        // Load PDF from bytes
509        let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
510            reason: e.to_string(),
511        })?;
512
513        // Get catalog
514        let catalog = doc.catalog().map_err(|e| PdfError::ExtractFailed {
515            reason: format!("failed to access catalog: {e}"),
516        })?;
517
518        // Get metadata reference
519        let metadata_ref = catalog
520            .get(b"Metadata")
521            .map_err(|_| PdfError::ExtractFailed {
522                reason: "no metadata found in catalog".to_string(),
523            })?
524            .as_reference()
525            .map_err(|_| PdfError::ExtractFailed {
526                reason: "metadata is not a reference".to_string(),
527            })?;
528
529        // Get metadata stream
530        let metadata_obj = doc
531            .get_object(metadata_ref)
532            .map_err(|e| PdfError::ExtractFailed {
533                reason: format!("failed to get metadata object: {e}"),
534            })?;
535
536        let metadata_stream = metadata_obj
537            .as_stream()
538            .map_err(|_| PdfError::ExtractFailed {
539                reason: "metadata is not a stream".to_string(),
540            })?;
541
542        // Parse XMP content
543        let xmp_content = String::from_utf8_lossy(&metadata_stream.content);
544
545        // Extract base64 data from <sf:HiddenData> tag
546        let start_tag = "<sf:HiddenData>";
547        let end_tag = "</sf:HiddenData>";
548
549        let start_idx = xmp_content
550            .find(start_tag)
551            .ok_or_else(|| PdfError::ExtractFailed {
552                reason: "no sf:HiddenData tag found".to_string(),
553            })?
554            .strict_add(start_tag.len());
555
556        let end_idx = xmp_content
557            .find(end_tag)
558            .ok_or_else(|| PdfError::ExtractFailed {
559                reason: "no closing sf:HiddenData tag found".to_string(),
560            })?;
561
562        let encoded_data = &xmp_content[start_idx..end_idx];
563
564        // Decode base64
565        let decoded = general_purpose::STANDARD
566            .decode(encoded_data.trim())
567            .map_err(|e| PdfError::ExtractFailed {
568                reason: format!("base64 decode failed: {e}"),
569            })?;
570
571        Ok(Payload::from_bytes(decoded))
572    }
573}
574
575fn ensure_pdf_cover(cover: &CoverMedia, technique: StegoTechnique) -> Result<Capacity, StegoError> {
576    if cover.kind != CoverMediaKind::PdfDocument {
577        return Err(StegoError::UnsupportedCoverType {
578            reason: format!("{technique:?} requires a PDF cover"),
579        });
580    }
581
582    Ok(Capacity {
583        bytes: estimate_capacity(cover, technique),
584        technique,
585    })
586}
587
588fn map_pdf_error(error: PdfError) -> StegoError {
589    match error {
590        PdfError::Encrypted => StegoError::UnsupportedCoverType {
591            reason: "encrypted PDF documents are not supported".to_string(),
592        },
593        PdfError::ExtractFailed { .. } => StegoError::NoPayloadFound,
594        PdfError::RenderFailed { page, reason } => StegoError::MalformedCoverData {
595            reason: format!("pdf render failed on page {page}: {reason}"),
596        },
597        PdfError::ParseFailed { reason }
598        | PdfError::RebuildFailed { reason }
599        | PdfError::EmbedFailed { reason }
600        | PdfError::IoError { reason } => StegoError::MalformedCoverData {
601            reason: format!("pdf processing failed: {reason}"),
602        },
603    }
604}
605
606/// Stego adapter that embeds payloads in PDF content streams.
607#[derive(Debug, Default)]
608pub struct PdfContentStreamStego {
609    processor: PdfProcessorImpl,
610}
611
612impl PdfContentStreamStego {
613    /// Create a content-stream PDF stego adapter with default processor settings.
614    #[must_use]
615    pub fn new() -> Self {
616        Self::default()
617    }
618}
619
620impl EmbedTechnique for PdfContentStreamStego {
621    fn technique(&self) -> StegoTechnique {
622        StegoTechnique::PdfContentStream
623    }
624
625    fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
626        ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
627    }
628
629    fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
630        ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
631        self.processor
632            .embed_in_content_stream(cover, payload)
633            .map_err(map_pdf_error)
634    }
635}
636
637impl ExtractTechnique for PdfContentStreamStego {
638    fn technique(&self) -> StegoTechnique {
639        StegoTechnique::PdfContentStream
640    }
641
642    fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
643        ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
644        self.processor
645            .extract_from_content_stream(stego)
646            .map_err(map_pdf_error)
647    }
648}
649
650/// Stego adapter that embeds payloads in PDF metadata fields.
651#[derive(Debug, Default)]
652pub struct PdfMetadataStego {
653    processor: PdfProcessorImpl,
654}
655
656impl PdfMetadataStego {
657    /// Create a metadata PDF stego adapter with default processor settings.
658    #[must_use]
659    pub fn new() -> Self {
660        Self::default()
661    }
662}
663
664impl EmbedTechnique for PdfMetadataStego {
665    fn technique(&self) -> StegoTechnique {
666        StegoTechnique::PdfMetadata
667    }
668
669    fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
670        ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
671    }
672
673    fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
674        ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
675        self.processor
676            .embed_in_metadata(cover, payload)
677            .map_err(map_pdf_error)
678    }
679}
680
681impl ExtractTechnique for PdfMetadataStego {
682    fn technique(&self) -> StegoTechnique {
683        StegoTechnique::PdfMetadata
684    }
685
686    fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
687        ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
688        self.processor
689            .extract_from_metadata(stego)
690            .map_err(map_pdf_error)
691    }
692}
693
694// ─── Tests ────────────────────────────────────────────────────────────────────
695
696#[cfg(test)]
697mod tests {
698    use super::*;
699    use tempfile::tempdir;
700
701    type TestResult = Result<(), Box<dyn std::error::Error>>;
702
703    #[test]
704    fn test_load_minimal_pdf() -> TestResult {
705        let processor = PdfProcessorImpl::default();
706        let dir = tempdir()?;
707        let path = dir.path().join("minimal.pdf");
708
709        // Create a minimal valid PDF with one page
710        let mut doc = Document::with_version("1.7");
711        let catalog_pages = doc.new_object_id();
712        let first_page = doc.new_object_id();
713
714        doc.objects.insert(
715            first_page,
716            Object::Dictionary(lopdf::dictionary! {
717                "Type" => "Page",
718                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
719                "Contents" => Object::Reference((first_page.0 + 1, 0)),
720            }),
721        );
722
723        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
724
725        doc.objects.insert(
726            catalog_pages,
727            Object::Dictionary(lopdf::dictionary! {
728                "Type" => "Pages",
729                "Kids" => vec![Object::Reference(first_page)],
730                "Count" => 1,
731            }),
732        );
733
734        let catalog_id = doc.add_object(lopdf::dictionary! {
735            "Type" => "Catalog",
736            "Pages" => Object::Reference(catalog_pages),
737        });
738
739        doc.trailer.set("Root", Object::Reference(catalog_id));
740        doc.save(&path)?;
741
742        // Load it
743        let media = processor.load_pdf(&path)?;
744        assert_eq!(media.kind, CoverMediaKind::PdfDocument);
745        assert_eq!(media.metadata.get(KEY_PAGE_COUNT), Some(&"1".to_string()));
746        Ok(())
747    }
748
749    #[test]
750    #[ignore = "requires pdfium system library"]
751    fn test_render_pages_returns_correct_count() -> TestResult {
752        let processor = PdfProcessorImpl::default();
753        let dir = tempdir()?;
754        let path = dir.path().join("two_page.pdf");
755
756        // Create a 2-page PDF
757        let mut doc = Document::with_version("1.7");
758        let catalog_pages = doc.new_object_id();
759
760        let page1_id = doc.new_object_id();
761        doc.objects.insert(
762            page1_id,
763            Object::Dictionary(lopdf::dictionary! {
764                "Type" => "Page",
765                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
766                "Contents" => Object::Reference((page1_id.0 + 1, 0)),
767            }),
768        );
769        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
770
771        let page2_id = doc.new_object_id();
772        doc.objects.insert(
773            page2_id,
774            Object::Dictionary(lopdf::dictionary! {
775                "Type" => "Page",
776                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
777                "Contents" => Object::Reference((page2_id.0 + 1, 0)),
778            }),
779        );
780        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
781
782        doc.objects.insert(
783            catalog_pages,
784            Object::Dictionary(lopdf::dictionary! {
785                "Type" => "Pages",
786                "Kids" => vec![
787                    Object::Reference(page1_id),
788                    Object::Reference(page2_id),
789                ],
790                "Count" => 2,
791            }),
792        );
793
794        let catalog_id = doc.add_object(lopdf::dictionary! {
795            "Type" => "Catalog",
796            "Pages" => Object::Reference(catalog_pages),
797        });
798
799        doc.trailer.set("Root", Object::Reference(catalog_id));
800        doc.save(&path)?;
801
802        // Load and render
803        let media = processor.load_pdf(&path)?;
804        let images = processor.render_pages_to_images(&media)?;
805        assert_eq!(images.len(), 2);
806        Ok(())
807    }
808
809    #[test]
810    #[ignore = "requires pdfium system library"]
811    fn test_rebuild_pdf_roundtrip() -> TestResult {
812        let processor = PdfProcessorImpl::default();
813        let dir = tempdir()?;
814        let path = dir.path().join("original.pdf");
815
816        // Create a 2-page PDF
817        let mut doc = Document::with_version("1.7");
818        let catalog_pages = doc.new_object_id();
819
820        let page1_id = doc.new_object_id();
821        doc.objects.insert(
822            page1_id,
823            Object::Dictionary(lopdf::dictionary! {
824                "Type" => "Page",
825                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
826                "Contents" => Object::Reference((page1_id.0 + 1, 0)),
827            }),
828        );
829        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
830
831        let page2_id = doc.new_object_id();
832        doc.objects.insert(
833            page2_id,
834            Object::Dictionary(lopdf::dictionary! {
835                "Type" => "Page",
836                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
837                "Contents" => Object::Reference((page2_id.0 + 1, 0)),
838            }),
839        );
840        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
841
842        doc.objects.insert(
843            catalog_pages,
844            Object::Dictionary(lopdf::dictionary! {
845                "Type" => "Pages",
846                "Kids" => vec![
847                    Object::Reference(page1_id),
848                    Object::Reference(page2_id),
849                ],
850                "Count" => 2,
851            }),
852        );
853
854        let catalog_id = doc.add_object(lopdf::dictionary! {
855            "Type" => "Catalog",
856            "Pages" => Object::Reference(catalog_pages),
857        });
858
859        doc.trailer.set("Root", Object::Reference(catalog_id));
860        doc.save(&path)?;
861
862        // Load, render, rebuild, and reload
863        let original = processor.load_pdf(&path)?;
864        let images = processor.render_pages_to_images(&original)?;
865        let rebuilt = processor.rebuild_pdf_from_images(images, &original)?;
866
867        // Save and reload to verify
868        let rebuilt_path = dir.path().join("rebuilt.pdf");
869        processor.save_pdf(&rebuilt, &rebuilt_path)?;
870        let reloaded = processor.load_pdf(&rebuilt_path)?;
871
872        assert_eq!(
873            reloaded.metadata.get(KEY_PAGE_COUNT),
874            original.metadata.get(KEY_PAGE_COUNT)
875        );
876        Ok(())
877    }
878
879    #[test]
880    #[ignore = "lopdf requires actual encrypted content, not just Encrypt trailer"]
881    fn test_encrypted_pdf_error() -> TestResult {
882        let processor = PdfProcessorImpl::default();
883        let dir = tempdir()?;
884        let path = dir.path().join("encrypted.pdf");
885
886        // Create an encrypted PDF
887        let mut doc = Document::with_version("1.7");
888        let catalog_pages = doc.new_object_id();
889        let first_page = doc.new_object_id();
890
891        doc.objects.insert(
892            first_page,
893            Object::Dictionary(lopdf::dictionary! {
894                "Type" => "Page",
895                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
896                "Contents" => Object::Reference((first_page.0 + 1, 0)),
897            }),
898        );
899
900        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
901
902        doc.objects.insert(
903            catalog_pages,
904            Object::Dictionary(lopdf::dictionary! {
905                "Type" => "Pages",
906                "Kids" => vec![Object::Reference(first_page)],
907                "Count" => 1,
908            }),
909        );
910
911        let catalog_id = doc.add_object(lopdf::dictionary! {
912            "Type" => "Catalog",
913            "Pages" => Object::Reference(catalog_pages),
914        });
915
916        doc.trailer.set("Root", Object::Reference(catalog_id));
917
918        // Add encryption dictionary
919        doc.trailer
920            .set("Encrypt", Object::Reference((doc.max_id + 1, 0)));
921        doc.objects.insert(
922            (doc.max_id + 1, 0),
923            Object::Dictionary(lopdf::dictionary! {
924                "Filter" => "Standard",
925                "V" => 1,
926                "R" => 2,
927            }),
928        );
929
930        doc.save(&path)?;
931
932        // Try to load it
933        let result = processor.load_pdf(&path);
934        assert!(matches!(result, Err(PdfError::Encrypted)));
935        Ok(())
936    }
937
938    #[test]
939    fn test_content_stream_lsb_roundtrip() -> TestResult {
940        let processor = PdfProcessorImpl::default();
941        let dir = tempdir()?;
942        let path = dir.path().join("test.pdf");
943
944        // Create a test PDF with content stream
945        let mut doc = Document::with_version("1.7");
946        let catalog_pages = doc.new_object_id();
947        let first_page = doc.new_object_id();
948
949        doc.objects.insert(
950            first_page,
951            Object::Dictionary(lopdf::dictionary! {
952                "Type" => "Page",
953                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
954                "Contents" => Object::Reference((first_page.0 + 1, 0)),
955            }),
956        );
957
958        // Content stream with many numeric values for capacity
959        let content = b"BT\n/F1 12 Tf\n100 700 Td\n(Hello) Tj\n200 650 Td\n(World) Tj\n50 600 Td\n(Test) Tj\n150 550 Td\n(PDF) Tj\nET\n1 0 0 1 0 0 cm\n";
960        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, content.to_vec()));
961
962        doc.objects.insert(
963            catalog_pages,
964            Object::Dictionary(lopdf::dictionary! {
965                "Type" => "Pages",
966                "Kids" => vec![Object::Reference(first_page)],
967                "Count" => 1,
968            }),
969        );
970
971        let catalog_id = doc.add_object(lopdf::dictionary! {
972            "Type" => "Catalog",
973            "Pages" => Object::Reference(catalog_pages),
974        });
975
976        doc.trailer.set("Root", Object::Reference(catalog_id));
977        doc.save(&path)?;
978
979        // Load and embed payload (very small to fit limited capacity)
980        let original = processor.load_pdf(&path)?;
981        let payload = Payload::from_bytes(vec![0xAB]); // 1 byte = 8 bits (need 8+ numbers)
982        let stego = processor.embed_in_content_stream(original, &payload)?;
983
984        // Verify PDF is still parseable
985        let stego_path = dir.path().join("stego.pdf");
986        processor.save_pdf(&stego, &stego_path)?;
987        let reloaded = processor.load_pdf(&stego_path)?;
988
989        // Extract and verify
990        let extracted = processor.extract_from_content_stream(&reloaded)?;
991        assert_eq!(extracted.as_bytes(), payload.as_bytes());
992        Ok(())
993    }
994
995    #[test]
996    fn test_metadata_embed_roundtrip() -> TestResult {
997        let processor = PdfProcessorImpl::default();
998        let dir = tempdir()?;
999        let path = dir.path().join("test.pdf");
1000
1001        // Create a minimal test PDF
1002        let mut doc = Document::with_version("1.7");
1003        let catalog_pages = doc.new_object_id();
1004        let first_page = doc.new_object_id();
1005
1006        doc.objects.insert(
1007            first_page,
1008            Object::Dictionary(lopdf::dictionary! {
1009                "Type" => "Page",
1010                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
1011                "Contents" => Object::Reference((first_page.0 + 1, 0)),
1012            }),
1013        );
1014
1015        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
1016
1017        doc.objects.insert(
1018            catalog_pages,
1019            Object::Dictionary(lopdf::dictionary! {
1020                "Type" => "Pages",
1021                "Kids" => vec![Object::Reference(first_page)],
1022                "Count" => 1,
1023            }),
1024        );
1025
1026        let catalog_id = doc.add_object(lopdf::dictionary! {
1027            "Type" => "Catalog",
1028            "Pages" => Object::Reference(catalog_pages),
1029        });
1030
1031        doc.trailer.set("Root", Object::Reference(catalog_id));
1032        doc.save(&path)?;
1033
1034        // Load and embed payload
1035        let original = processor.load_pdf(&path)?;
1036        let payload = Payload::from_bytes(vec![0u8; 128]); // 128-byte payload
1037        let stego = processor.embed_in_metadata(original, &payload)?;
1038
1039        // Verify PDF is still parseable
1040        let stego_path = dir.path().join("stego.pdf");
1041        processor.save_pdf(&stego, &stego_path)?;
1042        let reloaded = processor.load_pdf(&stego_path)?;
1043
1044        // Extract and verify
1045        let extracted = processor.extract_from_metadata(&reloaded)?;
1046        assert_eq!(extracted.as_bytes(), payload.as_bytes());
1047        Ok(())
1048    }
1049}