Skip to main content

shadowforge_lib/adapters/
pdf.rs

1//! PDF processing adapter using lopdf and pdfium-render.
2
3use std::collections::HashMap;
4use std::env;
5use std::io::BufWriter;
6use std::path::Path;
7
8use base64::Engine;
9use base64::engine::general_purpose;
10use bytes::Bytes;
11use image::{DynamicImage, ImageFormat};
12use lopdf::{Document, Object, dictionary};
13use pdfium_render::prelude::*;
14
15use crate::domain::analysis::estimate_capacity;
16use crate::domain::errors::{PdfError, StegoError};
17use crate::domain::ports::{EmbedTechnique, ExtractTechnique, PdfProcessor};
18use crate::domain::types::{Capacity, CoverMedia, CoverMediaKind, Payload, StegoTechnique};
19
20// Metadata keys
21const KEY_PAGE_COUNT: &str = "page_count";
22const DEFAULT_DPI: u16 = 150;
23
24/// PDF processor implementation using lopdf and pdfium-render.
25///
26/// Handles PDF loading/saving, page rasterisation, and PDF reconstruction.
27#[derive(Debug)]
28pub struct PdfProcessorImpl {
29    /// DPI for page rasterisation.
30    dpi: u16,
31}
32
33impl Default for PdfProcessorImpl {
34    fn default() -> Self {
35        Self { dpi: DEFAULT_DPI }
36    }
37}
38
39impl PdfProcessorImpl {
40    /// Create a new PDF processor with the given DPI.
41    #[must_use]
42    pub const fn new(dpi: u16) -> Self {
43        Self { dpi }
44    }
45
46    fn bind_pdfium() -> Result<Pdfium, PdfError> {
47        let mut bind_errors = Vec::new();
48
49        // 1. Try explicit env var override (highest priority)
50        if let Some(pdfium_dir) = env::var_os("PDFIUM_DYNAMIC_LIB_PATH") {
51            let library_path = Pdfium::pdfium_platform_library_name_at_path(&pdfium_dir);
52            match Pdfium::bind_to_library(library_path) {
53                Ok(bindings) => return Ok(Pdfium::new(bindings)),
54                Err(error) => bind_errors.push(format!(
55                    "PDFIUM_DYNAMIC_LIB_PATH={}: {error}",
56                    Path::new(&pdfium_dir).display()
57                )),
58            }
59        }
60
61        // 2. Try system library (searched via the OS dynamic linker)
62        match Pdfium::bind_to_system_library() {
63            Ok(bindings) => return Ok(Pdfium::new(bindings)),
64            Err(error) => {
65                bind_errors.push(format!("system library: {error}"));
66            }
67        }
68
69        // 3. Try local directory (e.g., ./)
70        let local_library = Pdfium::pdfium_platform_library_name_at_path("./");
71        match Pdfium::bind_to_library(local_library) {
72            Ok(bindings) => return Ok(Pdfium::new(bindings)),
73            Err(error) => bind_errors.push(format!("./: {error}")),
74        }
75
76        // 4. Fail with helpful error — use PdfError::BindFailed so it's not confused with a page render failure
77        Err(PdfError::BindFailed {
78            reason: format!(
79                "Failed to load pdfium library. Binding attempts: {}. \
80                 Download a prebuilt binary from https://github.com/bblanchon/pdfium-binaries/, \
81                 set PDFIUM_DYNAMIC_LIB_PATH, or disable the 'pdf' feature with --no-default-features --features corpus,adaptive.",
82                bind_errors.join("; ")
83            ),
84        })
85    }
86}
87
88impl PdfProcessor for PdfProcessorImpl {
89    fn load_pdf(&self, path: &Path) -> Result<CoverMedia, PdfError> {
90        // Load PDF document
91        let doc = Document::load(path).map_err(|e| PdfError::ParseFailed {
92            reason: e.to_string(),
93        })?;
94
95        // Check if encrypted
96        if doc.is_encrypted() {
97            return Err(PdfError::Encrypted);
98        }
99
100        // Count pages
101        let page_count = doc.get_pages().len();
102
103        // Read raw bytes
104        let bytes = std::fs::read(path).map_err(|e| PdfError::IoError {
105            reason: e.to_string(),
106        })?;
107
108        // Build metadata
109        let mut metadata = HashMap::new();
110        metadata.insert(KEY_PAGE_COUNT.to_string(), page_count.to_string());
111
112        Ok(CoverMedia {
113            kind: CoverMediaKind::PdfDocument,
114            data: Bytes::from(bytes),
115            metadata,
116        })
117    }
118
119    fn save_pdf(&self, media: &CoverMedia, path: &Path) -> Result<(), PdfError> {
120        // Write raw PDF bytes to file
121        std::fs::write(path, &media.data).map_err(|e| PdfError::IoError {
122            reason: e.to_string(),
123        })?;
124
125        Ok(())
126    }
127
128    fn render_pages_to_images(&self, pdf: &CoverMedia) -> Result<Vec<CoverMedia>, PdfError> {
129        // Initialize pdfium library using the CI-provided path when available.
130        let pdfium = Self::bind_pdfium()?;
131
132        // Load PDF from bytes
133        let document = pdfium
134            .load_pdf_from_byte_vec(pdf.data.to_vec(), None)
135            .map_err(|e| PdfError::ParseFailed {
136                reason: e.to_string(),
137            })?;
138
139        let page_count = document.pages().len();
140        let mut images = Vec::with_capacity(page_count as usize);
141
142        // Render each page
143        for page_index in 0..page_count {
144            let page = document
145                .pages()
146                .get(page_index)
147                .map_err(|e| PdfError::RenderFailed {
148                    page: page_index as usize,
149                    reason: e.to_string(),
150                })?;
151
152            // Render to bitmap
153            #[expect(
154                clippy::cast_possible_truncation,
155                reason = "DPI calculation for render"
156            )]
157            let target_width = (page.width().value * f32::from(self.dpi) / 72.0) as i32;
158
159            let bitmap = page
160                .render_with_config(&PdfRenderConfig::new().set_target_width(target_width))
161                .map_err(|e| PdfError::RenderFailed {
162                    page: page_index as usize,
163                    reason: e.to_string(),
164                })?;
165
166            // Convert to RGBA8 image
167            let width = bitmap.width().cast_unsigned();
168            let height = bitmap.height().cast_unsigned();
169            let rgba_data = bitmap.as_rgba_bytes();
170
171            let img =
172                image::RgbaImage::from_raw(width, height, rgba_data.clone()).ok_or_else(|| {
173                    PdfError::RenderFailed {
174                        page: page_index as usize,
175                        reason: "invalid bitmap dimensions".to_string(),
176                    }
177                })?;
178
179            // Build metadata
180            let mut metadata = HashMap::new();
181            metadata.insert("width".to_string(), width.to_string());
182            metadata.insert("height".to_string(), height.to_string());
183            metadata.insert("format".to_string(), "Png".to_string());
184            metadata.insert("page_index".to_string(), page_index.to_string());
185
186            images.push(CoverMedia {
187                kind: CoverMediaKind::PngImage,
188                data: Bytes::from(img.into_raw()),
189                metadata,
190            });
191        }
192
193        Ok(images)
194    }
195
196    #[expect(
197        clippy::too_many_lines,
198        reason = "PDF reconstruction logic is inherently complex"
199    )]
200    fn rebuild_pdf_from_images(
201        &self,
202        images: Vec<CoverMedia>,
203        _original: &CoverMedia,
204    ) -> Result<CoverMedia, PdfError> {
205        // Create a new PDF document
206        let mut doc = Document::with_version("1.7");
207
208        // Add each image as a page
209        for (page_index, img_media) in images.iter().enumerate() {
210            // Parse dimensions from metadata
211            let width: u32 = img_media
212                .metadata
213                .get("width")
214                .ok_or_else(|| PdfError::RebuildFailed {
215                    reason: "missing width metadata".to_string(),
216                })?
217                .parse()
218                .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
219                    reason: e.to_string(),
220                })?;
221
222            let height: u32 = img_media
223                .metadata
224                .get("height")
225                .ok_or_else(|| PdfError::RebuildFailed {
226                    reason: "missing height metadata".to_string(),
227                })?
228                .parse()
229                .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
230                    reason: e.to_string(),
231                })?;
232
233            // Convert RGBA data to PNG bytes
234            let img = image::RgbaImage::from_raw(width, height, img_media.data.to_vec())
235                .ok_or_else(|| PdfError::RebuildFailed {
236                    reason: "invalid image dimensions or data length".to_string(),
237                })?;
238
239            let dynamic_img = DynamicImage::ImageRgba8(img);
240            let mut png_bytes = Vec::new();
241            dynamic_img
242                .write_to(&mut std::io::Cursor::new(&mut png_bytes), ImageFormat::Png)
243                .map_err(|e| PdfError::RebuildFailed {
244                    reason: e.to_string(),
245                })?;
246
247            // Create a page with the image dimensions (convert pixels to points: 72 DPI)
248            #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
249            let page_width = width as f32 * 72.0 / f32::from(self.dpi);
250            #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
251            let page_height = height as f32 * 72.0 / f32::from(self.dpi);
252
253            let page_id = doc.new_object_id();
254            let page = doc.add_object(lopdf::dictionary! {
255                "Type" => "Page",
256                "MediaBox" => vec![0.into(), 0.into(), page_width.into(), page_height.into()],
257                "Contents" => Object::Reference((page_id.0 + 1, 0)),
258                "Resources" => lopdf::dictionary! {
259                    "XObject" => lopdf::dictionary! {
260                        "Image1" => Object::Reference((page_id.0 + 2, 0)),
261                    },
262                },
263            });
264
265            // Create content stream that displays the image
266            let content = format!("q\n{page_width} 0 0 {page_height} 0 0 cm\n/Image1 Do\nQ");
267            let content_id = doc.add_object(lopdf::Stream::new(
268                lopdf::dictionary! {},
269                content.into_bytes(),
270            ));
271
272            // Add the PNG image as an XObject
273            let image_id = doc.add_object(lopdf::Stream::new(
274                lopdf::dictionary! {
275                    "Type" => "XObject",
276                    "Subtype" => "Image",
277                    "Width" => i64::from(width),
278                    "Height" => i64::from(height),
279                    "ColorSpace" => "DeviceRGB",
280                    "BitsPerComponent" => 8,
281                    "Filter" => "FlateDecode",
282                },
283                png_bytes,
284            ));
285
286            // Verify object IDs match what we referenced
287            assert_eq!(page, (page_id.0, 0));
288            assert_eq!(content_id, (page_id.0 + 1, 0));
289            assert_eq!(image_id, (page_id.0 + 2, 0));
290
291            // Add page to pages collection
292            if doc.catalog().is_err() {
293                // Create catalog and pages root
294                let pages_obj_id = doc.new_object_id();
295                let catalog_id = doc.add_object(lopdf::dictionary! {
296                    "Type" => "Catalog",
297                    "Pages" => Object::Reference(pages_obj_id),
298                });
299                doc.trailer.set("Root", Object::Reference(catalog_id));
300
301                doc.objects.insert(
302                    pages_obj_id,
303                    lopdf::Object::Dictionary(lopdf::dictionary! {
304                        "Type" => "Pages",
305                        "Kids" => vec![Object::Reference(page)],
306                        "Count" => 1,
307                    }),
308                );
309            } else {
310                // Add to existing pages
311                if let Ok(pages_ref) = doc.catalog().and_then(|c| c.get(b"Pages"))
312                    && let Ok(pages_obj_id) = pages_ref.as_reference()
313                    && let Ok(pages_dict) = doc.get_object_mut(pages_obj_id)
314                    && let Object::Dictionary(dict) = pages_dict
315                {
316                    // Get current kids array
317                    let mut kids = if let Ok(Object::Array(arr)) = dict.get(b"Kids") {
318                        arr.clone()
319                    } else {
320                        vec![]
321                    };
322                    kids.push(Object::Reference(page));
323
324                    dict.set("Kids", Object::Array(kids));
325                    #[expect(clippy::cast_possible_wrap, reason = "page count fits in i64")]
326                    dict.set("Count", (page_index + 1) as i64);
327                }
328            }
329        }
330
331        // Serialize to bytes
332        let mut pdf_bytes = Vec::new();
333        doc.save_to(&mut BufWriter::new(&mut pdf_bytes))
334            .map_err(|e| PdfError::RebuildFailed {
335                reason: e.to_string(),
336            })?;
337
338        // Build metadata
339        let mut metadata = HashMap::new();
340        metadata.insert(KEY_PAGE_COUNT.to_string(), images.len().to_string());
341
342        Ok(CoverMedia {
343            kind: CoverMediaKind::PdfDocument,
344            data: Bytes::from(pdf_bytes),
345            metadata,
346        })
347    }
348
349    fn embed_in_content_stream(
350        &self,
351        pdf: CoverMedia,
352        payload: &Payload,
353    ) -> Result<CoverMedia, PdfError> {
354        // Load PDF from bytes
355        let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
356            reason: e.to_string(),
357        })?;
358
359        // Convert payload to bits
360        let payload_bits: Vec<u8> = payload
361            .as_bytes()
362            .iter()
363            .flat_map(|byte| (0..8).rev().map(move |i| (byte >> i) & 1))
364            .collect();
365
366        let mut bit_index = 0;
367
368        // Iterate through all objects to find content streams
369        let object_ids: Vec<_> = doc.objects.keys().copied().collect();
370        for obj_id in object_ids {
371            if bit_index >= payload_bits.len() {
372                break;
373            }
374
375            if let Ok(obj) = doc.get_object_mut(obj_id)
376                && let Object::Stream(stream) = obj
377            {
378                // Parse content stream
379                let content = String::from_utf8_lossy(&stream.content);
380                let mut modified_content = String::new();
381                let mut tokens: Vec<&str> = content.split_whitespace().collect();
382
383                for token in &mut tokens {
384                    if bit_index >= payload_bits.len() {
385                        modified_content.push_str(token);
386                        modified_content.push(' ');
387                        continue;
388                    }
389
390                    // Check if token is a number
391                    if let Ok(mut num) = token.parse::<i32>() {
392                        // Embed bit in LSB — bit_index < payload_bits.len() guaranteed by guard above
393                        if let Some(&bit) = payload_bits.get(bit_index) {
394                            if bit == 1 {
395                                num |= 1; // Set LSB
396                            } else {
397                                num &= !1; // Clear LSB
398                            }
399                        }
400                        modified_content.push_str(&num.to_string());
401                        bit_index += 1;
402                    } else {
403                        modified_content.push_str(token);
404                    }
405                    modified_content.push(' ');
406                }
407
408                // Update stream content
409                stream.set_content(modified_content.trim().as_bytes().to_vec());
410            }
411        }
412
413        if bit_index < payload_bits.len() {
414            return Err(PdfError::EmbedFailed {
415                reason: format!(
416                    "insufficient capacity: embedded {bit_index}/{} bits",
417                    payload_bits.len()
418                ),
419            });
420        }
421
422        // Serialize modified PDF
423        let mut pdf_bytes = Vec::new();
424        doc.save_to(&mut pdf_bytes)
425            .map_err(|e| PdfError::EmbedFailed {
426                reason: e.to_string(),
427            })?;
428
429        Ok(CoverMedia {
430            kind: pdf.kind,
431            data: Bytes::from(pdf_bytes),
432            metadata: pdf.metadata,
433        })
434    }
435
436    fn extract_from_content_stream(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
437        // Load PDF from bytes
438        let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
439            reason: e.to_string(),
440        })?;
441
442        let mut extracted_bits = Vec::new();
443
444        // Iterate through all objects to find content streams
445        for obj in doc.objects.values() {
446            if let Object::Stream(stream) = obj {
447                // Parse content stream
448                let content = String::from_utf8_lossy(&stream.content);
449                let tokens: Vec<&str> = content.split_whitespace().collect();
450
451                for token in tokens {
452                    // Check if token is a number
453                    if let Ok(num) = token.parse::<i32>() {
454                        // Extract LSB
455                        #[expect(clippy::cast_sign_loss, reason = "LSB is always 0 or 1")]
456                        extracted_bits.push((num & 1) as u8);
457                    }
458                }
459            }
460        }
461
462        // Convert bits to bytes
463        if extracted_bits.is_empty() {
464            return Err(PdfError::ExtractFailed {
465                reason: "no numeric values found in content streams".to_string(),
466            });
467        }
468
469        let mut payload_bytes = Vec::new();
470        for chunk in extracted_bits.chunks(8) {
471            if chunk.len() == 8 {
472                let mut byte = 0u8;
473                for (i, bit) in chunk.iter().enumerate() {
474                    byte |= bit << (7 - i);
475                }
476                payload_bytes.push(byte);
477            }
478        }
479
480        Ok(Payload::from_bytes(payload_bytes))
481    }
482
483    fn embed_in_metadata(
484        &self,
485        pdf: CoverMedia,
486        payload: &Payload,
487    ) -> Result<CoverMedia, PdfError> {
488        // Load PDF from bytes
489        let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
490            reason: e.to_string(),
491        })?;
492
493        // Base64-encode payload
494        let encoded = general_purpose::STANDARD.encode(payload.as_bytes());
495
496        // Create XMP metadata with custom field
497        let xmp_content = format!(
498            r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
499<x:xmpmeta xmlns:x="adobe:ns:meta/">
500  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
501    <rdf:Description rdf:about=""
502      xmlns:sf="http://shadowforge.org/ns/1.0/">
503      <sf:HiddenData>{encoded}</sf:HiddenData>
504    </rdf:Description>
505  </rdf:RDF>
506</x:xmpmeta>
507<?xpacket end="w"?>"#
508        );
509
510        // Create metadata stream
511        let metadata_id = doc.add_object(lopdf::Stream::new(
512            lopdf::dictionary! {
513                "Type" => "Metadata",
514                "Subtype" => "XML",
515            },
516            xmp_content.into_bytes(),
517        ));
518
519        // Add metadata reference to catalog
520        if let Ok(catalog) = doc.catalog_mut() {
521            catalog.set("Metadata", Object::Reference(metadata_id));
522        } else {
523            return Err(PdfError::EmbedFailed {
524                reason: "failed to access catalog".to_string(),
525            });
526        }
527
528        // Serialize modified PDF
529        let mut pdf_bytes = Vec::new();
530        doc.save_to(&mut pdf_bytes)
531            .map_err(|e| PdfError::EmbedFailed {
532                reason: e.to_string(),
533            })?;
534
535        Ok(CoverMedia {
536            kind: pdf.kind,
537            data: Bytes::from(pdf_bytes),
538            metadata: pdf.metadata,
539        })
540    }
541
542    fn extract_from_metadata(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
543        // Load PDF from bytes
544        let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
545            reason: e.to_string(),
546        })?;
547
548        // Get catalog
549        let catalog = doc.catalog().map_err(|e| PdfError::ExtractFailed {
550            reason: format!("failed to access catalog: {e}"),
551        })?;
552
553        // Get metadata reference
554        let metadata_ref = catalog
555            .get(b"Metadata")
556            .map_err(|_| PdfError::ExtractFailed {
557                reason: "no metadata found in catalog".to_string(),
558            })?
559            .as_reference()
560            .map_err(|_| PdfError::ExtractFailed {
561                reason: "metadata is not a reference".to_string(),
562            })?;
563
564        // Get metadata stream
565        let metadata_obj = doc
566            .get_object(metadata_ref)
567            .map_err(|e| PdfError::ExtractFailed {
568                reason: format!("failed to get metadata object: {e}"),
569            })?;
570
571        let metadata_stream = metadata_obj
572            .as_stream()
573            .map_err(|_| PdfError::ExtractFailed {
574                reason: "metadata is not a stream".to_string(),
575            })?;
576
577        // Parse XMP content
578        let xmp_content = String::from_utf8_lossy(&metadata_stream.content);
579
580        // Extract base64 data from <sf:HiddenData> tag
581        let start_tag = "<sf:HiddenData>";
582        let end_tag = "</sf:HiddenData>";
583
584        let start_idx = xmp_content
585            .find(start_tag)
586            .ok_or_else(|| PdfError::ExtractFailed {
587                reason: "no sf:HiddenData tag found".to_string(),
588            })?
589            .strict_add(start_tag.len());
590
591        let end_idx = xmp_content
592            .find(end_tag)
593            .ok_or_else(|| PdfError::ExtractFailed {
594                reason: "no closing sf:HiddenData tag found".to_string(),
595            })?;
596
597        let encoded_data = &xmp_content[start_idx..end_idx];
598
599        // Decode base64
600        let decoded = general_purpose::STANDARD
601            .decode(encoded_data.trim())
602            .map_err(|e| PdfError::ExtractFailed {
603                reason: format!("base64 decode failed: {e}"),
604            })?;
605
606        Ok(Payload::from_bytes(decoded))
607    }
608}
609
610fn ensure_pdf_cover(cover: &CoverMedia, technique: StegoTechnique) -> Result<Capacity, StegoError> {
611    if cover.kind != CoverMediaKind::PdfDocument {
612        return Err(StegoError::UnsupportedCoverType {
613            reason: format!("{technique:?} requires a PDF cover"),
614        });
615    }
616
617    Ok(Capacity {
618        bytes: estimate_capacity(cover, technique),
619        technique,
620    })
621}
622
623fn map_pdf_error(error: PdfError) -> StegoError {
624    match error {
625        PdfError::Encrypted => StegoError::UnsupportedCoverType {
626            reason: "encrypted PDF documents are not supported".to_string(),
627        },
628        PdfError::ExtractFailed { .. } => StegoError::NoPayloadFound,
629        PdfError::RenderFailed { page, reason } => StegoError::MalformedCoverData {
630            reason: format!("pdf render failed on page {page}: {reason}"),
631        },
632        PdfError::ParseFailed { reason }
633        | PdfError::RebuildFailed { reason }
634        | PdfError::EmbedFailed { reason }
635        | PdfError::IoError { reason } => StegoError::MalformedCoverData {
636            reason: format!("pdf processing failed: {reason}"),
637        },
638        PdfError::BindFailed { reason } => StegoError::UnsupportedCoverType {
639            reason: format!("pdfium library is not available: {reason}"),
640        },
641    }
642}
643
644/// Stego adapter that embeds payloads in PDF content streams.
645#[derive(Debug, Default)]
646pub struct PdfContentStreamStego {
647    processor: PdfProcessorImpl,
648}
649
650impl PdfContentStreamStego {
651    /// Create a content-stream PDF stego adapter with default processor settings.
652    #[must_use]
653    pub fn new() -> Self {
654        Self::default()
655    }
656}
657
658impl EmbedTechnique for PdfContentStreamStego {
659    fn technique(&self) -> StegoTechnique {
660        StegoTechnique::PdfContentStream
661    }
662
663    fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
664        ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
665    }
666
667    fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
668        ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
669        self.processor
670            .embed_in_content_stream(cover, payload)
671            .map_err(map_pdf_error)
672    }
673}
674
675impl ExtractTechnique for PdfContentStreamStego {
676    fn technique(&self) -> StegoTechnique {
677        StegoTechnique::PdfContentStream
678    }
679
680    fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
681        ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
682        self.processor
683            .extract_from_content_stream(stego)
684            .map_err(map_pdf_error)
685    }
686}
687
688/// Stego adapter that embeds payloads in PDF metadata fields.
689#[derive(Debug, Default)]
690pub struct PdfMetadataStego {
691    processor: PdfProcessorImpl,
692}
693
694impl PdfMetadataStego {
695    /// Create a metadata PDF stego adapter with default processor settings.
696    #[must_use]
697    pub fn new() -> Self {
698        Self::default()
699    }
700}
701
702impl EmbedTechnique for PdfMetadataStego {
703    fn technique(&self) -> StegoTechnique {
704        StegoTechnique::PdfMetadata
705    }
706
707    fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
708        ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
709    }
710
711    fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
712        ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
713        self.processor
714            .embed_in_metadata(cover, payload)
715            .map_err(map_pdf_error)
716    }
717}
718
719impl ExtractTechnique for PdfMetadataStego {
720    fn technique(&self) -> StegoTechnique {
721        StegoTechnique::PdfMetadata
722    }
723
724    fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
725        ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
726        self.processor
727            .extract_from_metadata(stego)
728            .map_err(map_pdf_error)
729    }
730}
731
732// ─── Tests ────────────────────────────────────────────────────────────────────
733
734#[cfg(test)]
735mod tests {
736    use super::*;
737    use tempfile::tempdir;
738
739    type TestResult = Result<(), Box<dyn std::error::Error>>;
740
741    #[test]
742    fn test_load_minimal_pdf() -> TestResult {
743        let processor = PdfProcessorImpl::default();
744        let dir = tempdir()?;
745        let path = dir.path().join("minimal.pdf");
746
747        // Create a minimal valid PDF with one page
748        let mut doc = Document::with_version("1.7");
749        let catalog_pages = doc.new_object_id();
750        let first_page = doc.new_object_id();
751
752        doc.objects.insert(
753            first_page,
754            Object::Dictionary(lopdf::dictionary! {
755                "Type" => "Page",
756                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
757                "Contents" => Object::Reference((first_page.0 + 1, 0)),
758            }),
759        );
760
761        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
762
763        doc.objects.insert(
764            catalog_pages,
765            Object::Dictionary(lopdf::dictionary! {
766                "Type" => "Pages",
767                "Kids" => vec![Object::Reference(first_page)],
768                "Count" => 1,
769            }),
770        );
771
772        let catalog_id = doc.add_object(lopdf::dictionary! {
773            "Type" => "Catalog",
774            "Pages" => Object::Reference(catalog_pages),
775        });
776
777        doc.trailer.set("Root", Object::Reference(catalog_id));
778        doc.save(&path)?;
779
780        // Load it
781        let media = processor.load_pdf(&path)?;
782        assert_eq!(media.kind, CoverMediaKind::PdfDocument);
783        assert_eq!(media.metadata.get(KEY_PAGE_COUNT), Some(&"1".to_string()));
784        Ok(())
785    }
786
787    #[test]
788    #[ignore = "requires pdfium system library"]
789    fn test_render_pages_returns_correct_count() -> TestResult {
790        let processor = PdfProcessorImpl::default();
791        let dir = tempdir()?;
792        let path = dir.path().join("two_page.pdf");
793
794        // Create a 2-page PDF
795        let mut doc = Document::with_version("1.7");
796        let catalog_pages = doc.new_object_id();
797
798        let page1_id = doc.new_object_id();
799        doc.objects.insert(
800            page1_id,
801            Object::Dictionary(lopdf::dictionary! {
802                "Type" => "Page",
803                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
804                "Contents" => Object::Reference((page1_id.0 + 1, 0)),
805            }),
806        );
807        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
808
809        let page2_id = doc.new_object_id();
810        doc.objects.insert(
811            page2_id,
812            Object::Dictionary(lopdf::dictionary! {
813                "Type" => "Page",
814                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
815                "Contents" => Object::Reference((page2_id.0 + 1, 0)),
816            }),
817        );
818        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
819
820        doc.objects.insert(
821            catalog_pages,
822            Object::Dictionary(lopdf::dictionary! {
823                "Type" => "Pages",
824                "Kids" => vec![
825                    Object::Reference(page1_id),
826                    Object::Reference(page2_id),
827                ],
828                "Count" => 2,
829            }),
830        );
831
832        let catalog_id = doc.add_object(lopdf::dictionary! {
833            "Type" => "Catalog",
834            "Pages" => Object::Reference(catalog_pages),
835        });
836
837        doc.trailer.set("Root", Object::Reference(catalog_id));
838        doc.save(&path)?;
839
840        // Load and render
841        let media = processor.load_pdf(&path)?;
842        let images = processor.render_pages_to_images(&media)?;
843        assert_eq!(images.len(), 2);
844        Ok(())
845    }
846
847    #[test]
848    #[ignore = "requires pdfium system library"]
849    fn test_rebuild_pdf_roundtrip() -> TestResult {
850        let processor = PdfProcessorImpl::default();
851        let dir = tempdir()?;
852        let path = dir.path().join("original.pdf");
853
854        // Create a 2-page PDF
855        let mut doc = Document::with_version("1.7");
856        let catalog_pages = doc.new_object_id();
857
858        let page1_id = doc.new_object_id();
859        doc.objects.insert(
860            page1_id,
861            Object::Dictionary(lopdf::dictionary! {
862                "Type" => "Page",
863                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
864                "Contents" => Object::Reference((page1_id.0 + 1, 0)),
865            }),
866        );
867        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
868
869        let page2_id = doc.new_object_id();
870        doc.objects.insert(
871            page2_id,
872            Object::Dictionary(lopdf::dictionary! {
873                "Type" => "Page",
874                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
875                "Contents" => Object::Reference((page2_id.0 + 1, 0)),
876            }),
877        );
878        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
879
880        doc.objects.insert(
881            catalog_pages,
882            Object::Dictionary(lopdf::dictionary! {
883                "Type" => "Pages",
884                "Kids" => vec![
885                    Object::Reference(page1_id),
886                    Object::Reference(page2_id),
887                ],
888                "Count" => 2,
889            }),
890        );
891
892        let catalog_id = doc.add_object(lopdf::dictionary! {
893            "Type" => "Catalog",
894            "Pages" => Object::Reference(catalog_pages),
895        });
896
897        doc.trailer.set("Root", Object::Reference(catalog_id));
898        doc.save(&path)?;
899
900        // Load, render, rebuild, and reload
901        let original = processor.load_pdf(&path)?;
902        let images = processor.render_pages_to_images(&original)?;
903        let rebuilt = processor.rebuild_pdf_from_images(images, &original)?;
904
905        // Save and reload to verify
906        let rebuilt_path = dir.path().join("rebuilt.pdf");
907        processor.save_pdf(&rebuilt, &rebuilt_path)?;
908        let reloaded = processor.load_pdf(&rebuilt_path)?;
909
910        assert_eq!(
911            reloaded.metadata.get(KEY_PAGE_COUNT),
912            original.metadata.get(KEY_PAGE_COUNT)
913        );
914        Ok(())
915    }
916
917    #[test]
918    #[ignore = "lopdf requires actual encrypted content, not just Encrypt trailer"]
919    fn test_encrypted_pdf_error() -> TestResult {
920        let processor = PdfProcessorImpl::default();
921        let dir = tempdir()?;
922        let path = dir.path().join("encrypted.pdf");
923
924        // Create an encrypted PDF
925        let mut doc = Document::with_version("1.7");
926        let catalog_pages = doc.new_object_id();
927        let first_page = doc.new_object_id();
928
929        doc.objects.insert(
930            first_page,
931            Object::Dictionary(lopdf::dictionary! {
932                "Type" => "Page",
933                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
934                "Contents" => Object::Reference((first_page.0 + 1, 0)),
935            }),
936        );
937
938        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
939
940        doc.objects.insert(
941            catalog_pages,
942            Object::Dictionary(lopdf::dictionary! {
943                "Type" => "Pages",
944                "Kids" => vec![Object::Reference(first_page)],
945                "Count" => 1,
946            }),
947        );
948
949        let catalog_id = doc.add_object(lopdf::dictionary! {
950            "Type" => "Catalog",
951            "Pages" => Object::Reference(catalog_pages),
952        });
953
954        doc.trailer.set("Root", Object::Reference(catalog_id));
955
956        // Add encryption dictionary
957        doc.trailer
958            .set("Encrypt", Object::Reference((doc.max_id + 1, 0)));
959        doc.objects.insert(
960            (doc.max_id + 1, 0),
961            Object::Dictionary(lopdf::dictionary! {
962                "Filter" => "Standard",
963                "V" => 1,
964                "R" => 2,
965            }),
966        );
967
968        doc.save(&path)?;
969
970        // Try to load it
971        let result = processor.load_pdf(&path);
972        assert!(matches!(result, Err(PdfError::Encrypted)));
973        Ok(())
974    }
975
976    #[test]
977    fn test_content_stream_lsb_roundtrip() -> TestResult {
978        let processor = PdfProcessorImpl::default();
979        let dir = tempdir()?;
980        let path = dir.path().join("test.pdf");
981
982        // Create a test PDF with content stream
983        let mut doc = Document::with_version("1.7");
984        let catalog_pages = doc.new_object_id();
985        let first_page = doc.new_object_id();
986
987        doc.objects.insert(
988            first_page,
989            Object::Dictionary(lopdf::dictionary! {
990                "Type" => "Page",
991                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
992                "Contents" => Object::Reference((first_page.0 + 1, 0)),
993            }),
994        );
995
996        // Content stream with many numeric values for capacity
997        let content = b"BT\n/F1 12 Tf\n100 700 Td\n(Hello) Tj\n200 650 Td\n(World) Tj\n50 600 Td\n(Test) Tj\n150 550 Td\n(PDF) Tj\nET\n1 0 0 1 0 0 cm\n";
998        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, content.to_vec()));
999
1000        doc.objects.insert(
1001            catalog_pages,
1002            Object::Dictionary(lopdf::dictionary! {
1003                "Type" => "Pages",
1004                "Kids" => vec![Object::Reference(first_page)],
1005                "Count" => 1,
1006            }),
1007        );
1008
1009        let catalog_id = doc.add_object(lopdf::dictionary! {
1010            "Type" => "Catalog",
1011            "Pages" => Object::Reference(catalog_pages),
1012        });
1013
1014        doc.trailer.set("Root", Object::Reference(catalog_id));
1015        doc.save(&path)?;
1016
1017        // Load and embed payload (very small to fit limited capacity)
1018        let original = processor.load_pdf(&path)?;
1019        let payload = Payload::from_bytes(vec![0xAB]); // 1 byte = 8 bits (need 8+ numbers)
1020        let stego = processor.embed_in_content_stream(original, &payload)?;
1021
1022        // Verify PDF is still parseable
1023        let stego_path = dir.path().join("stego.pdf");
1024        processor.save_pdf(&stego, &stego_path)?;
1025        let reloaded = processor.load_pdf(&stego_path)?;
1026
1027        // Extract and verify
1028        let extracted = processor.extract_from_content_stream(&reloaded)?;
1029        assert_eq!(extracted.as_bytes(), payload.as_bytes());
1030        Ok(())
1031    }
1032
1033    #[test]
1034    fn test_metadata_embed_roundtrip() -> TestResult {
1035        let processor = PdfProcessorImpl::default();
1036        let dir = tempdir()?;
1037        let path = dir.path().join("test.pdf");
1038
1039        // Create a minimal test PDF
1040        let mut doc = Document::with_version("1.7");
1041        let catalog_pages = doc.new_object_id();
1042        let first_page = doc.new_object_id();
1043
1044        doc.objects.insert(
1045            first_page,
1046            Object::Dictionary(lopdf::dictionary! {
1047                "Type" => "Page",
1048                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
1049                "Contents" => Object::Reference((first_page.0 + 1, 0)),
1050            }),
1051        );
1052
1053        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
1054
1055        doc.objects.insert(
1056            catalog_pages,
1057            Object::Dictionary(lopdf::dictionary! {
1058                "Type" => "Pages",
1059                "Kids" => vec![Object::Reference(first_page)],
1060                "Count" => 1,
1061            }),
1062        );
1063
1064        let catalog_id = doc.add_object(lopdf::dictionary! {
1065            "Type" => "Catalog",
1066            "Pages" => Object::Reference(catalog_pages),
1067        });
1068
1069        doc.trailer.set("Root", Object::Reference(catalog_id));
1070        doc.save(&path)?;
1071
1072        // Load and embed payload
1073        let original = processor.load_pdf(&path)?;
1074        let payload = Payload::from_bytes(vec![0u8; 128]); // 128-byte payload
1075        let stego = processor.embed_in_metadata(original, &payload)?;
1076
1077        // Verify PDF is still parseable
1078        let stego_path = dir.path().join("stego.pdf");
1079        processor.save_pdf(&stego, &stego_path)?;
1080        let reloaded = processor.load_pdf(&stego_path)?;
1081
1082        // Extract and verify
1083        let extracted = processor.extract_from_metadata(&reloaded)?;
1084        assert_eq!(extracted.as_bytes(), payload.as_bytes());
1085        Ok(())
1086    }
1087}