Skip to main content

shadowforge_lib/adapters/
pdf.rs

1//! PDF processing adapter using lopdf and pdfium-render.
2
3use std::collections::HashMap;
4use std::env;
5use std::io::BufWriter;
6use std::path::Path;
7
8use base64::Engine;
9use base64::engine::general_purpose;
10use bytes::Bytes;
11use image::{DynamicImage, ImageFormat};
12use lopdf::{Document, Object, dictionary};
13use pdfium_render::prelude::*;
14
15use crate::domain::analysis::estimate_capacity;
16use crate::domain::errors::{PdfError, StegoError};
17use crate::domain::ports::{EmbedTechnique, ExtractTechnique, PdfProcessor};
18use crate::domain::types::{Capacity, CoverMedia, CoverMediaKind, Payload, StegoTechnique};
19
20// Metadata keys
21const KEY_PAGE_COUNT: &str = "page_count";
22const DEFAULT_DPI: u16 = 150;
23
24/// PDF processor implementation using lopdf and pdfium-render.
25///
26/// Handles PDF loading/saving, page rasterisation, and PDF reconstruction.
27#[derive(Debug)]
28pub struct PdfProcessorImpl {
29    /// DPI for page rasterisation.
30    dpi: u16,
31}
32
33impl Default for PdfProcessorImpl {
34    fn default() -> Self {
35        Self { dpi: DEFAULT_DPI }
36    }
37}
38
39impl PdfProcessorImpl {
40    /// Create a new PDF processor with the given DPI.
41    #[must_use]
42    pub const fn new(dpi: u16) -> Self {
43        Self { dpi }
44    }
45
46    fn bind_pdfium() -> Result<Pdfium, PdfError> {
47        let mut bind_errors = Vec::new();
48
49        if let Some(pdfium_dir) = env::var_os("PDFIUM_DYNAMIC_LIB_PATH") {
50            let library_path = Pdfium::pdfium_platform_library_name_at_path(&pdfium_dir);
51            match Pdfium::bind_to_library(library_path) {
52                Ok(bindings) => return Ok(Pdfium::new(bindings)),
53                Err(error) => bind_errors.push(format!(
54                    "PDFIUM_DYNAMIC_LIB_PATH={}: {error}",
55                    Path::new(&pdfium_dir).display()
56                )),
57            }
58        }
59
60        let local_library = Pdfium::pdfium_platform_library_name_at_path("./");
61        match Pdfium::bind_to_library(local_library) {
62            Ok(bindings) => return Ok(Pdfium::new(bindings)),
63            Err(error) => bind_errors.push(format!("./: {error}")),
64        }
65
66        match Pdfium::bind_to_system_library() {
67            Ok(bindings) => Ok(Pdfium::new(bindings)),
68            Err(error) => {
69                bind_errors.push(format!("system library: {error}"));
70                Err(PdfError::RenderFailed {
71                    page: 0,
72                    reason: format!(
73                        "Failed to load pdfium library. Tried {}",
74                        bind_errors.join(", ")
75                    ),
76                })
77            }
78        }
79    }
80}
81
82impl PdfProcessor for PdfProcessorImpl {
83    fn load_pdf(&self, path: &Path) -> Result<CoverMedia, PdfError> {
84        // Load PDF document
85        let doc = Document::load(path).map_err(|e| PdfError::ParseFailed {
86            reason: e.to_string(),
87        })?;
88
89        // Check if encrypted
90        if doc.is_encrypted() {
91            return Err(PdfError::Encrypted);
92        }
93
94        // Count pages
95        let page_count = doc.get_pages().len();
96
97        // Read raw bytes
98        let bytes = std::fs::read(path).map_err(|e| PdfError::IoError {
99            reason: e.to_string(),
100        })?;
101
102        // Build metadata
103        let mut metadata = HashMap::new();
104        metadata.insert(KEY_PAGE_COUNT.to_string(), page_count.to_string());
105
106        Ok(CoverMedia {
107            kind: CoverMediaKind::PdfDocument,
108            data: Bytes::from(bytes),
109            metadata,
110        })
111    }
112
113    fn save_pdf(&self, media: &CoverMedia, path: &Path) -> Result<(), PdfError> {
114        // Write raw PDF bytes to file
115        std::fs::write(path, &media.data).map_err(|e| PdfError::IoError {
116            reason: e.to_string(),
117        })?;
118
119        Ok(())
120    }
121
122    fn render_pages_to_images(&self, pdf: &CoverMedia) -> Result<Vec<CoverMedia>, PdfError> {
123        // Initialize pdfium library using the CI-provided path when available.
124        let pdfium = Self::bind_pdfium()?;
125
126        // Load PDF from bytes
127        let document = pdfium
128            .load_pdf_from_byte_vec(pdf.data.to_vec(), None)
129            .map_err(|e| PdfError::ParseFailed {
130                reason: e.to_string(),
131            })?;
132
133        let page_count = document.pages().len();
134        let mut images = Vec::with_capacity(page_count as usize);
135
136        // Render each page
137        for page_index in 0..page_count {
138            let page = document
139                .pages()
140                .get(page_index)
141                .map_err(|e| PdfError::RenderFailed {
142                    page: page_index as usize,
143                    reason: e.to_string(),
144                })?;
145
146            // Render to bitmap
147            #[expect(
148                clippy::cast_possible_truncation,
149                reason = "DPI calculation for render"
150            )]
151            let target_width = (page.width().value * f32::from(self.dpi) / 72.0) as i32;
152
153            let bitmap = page
154                .render_with_config(&PdfRenderConfig::new().set_target_width(target_width))
155                .map_err(|e| PdfError::RenderFailed {
156                    page: page_index as usize,
157                    reason: e.to_string(),
158                })?;
159
160            // Convert to RGBA8 image
161            let width = bitmap.width().cast_unsigned();
162            let height = bitmap.height().cast_unsigned();
163            let rgba_data = bitmap.as_rgba_bytes();
164
165            let img =
166                image::RgbaImage::from_raw(width, height, rgba_data.clone()).ok_or_else(|| {
167                    PdfError::RenderFailed {
168                        page: page_index as usize,
169                        reason: "invalid bitmap dimensions".to_string(),
170                    }
171                })?;
172
173            // Build metadata
174            let mut metadata = HashMap::new();
175            metadata.insert("width".to_string(), width.to_string());
176            metadata.insert("height".to_string(), height.to_string());
177            metadata.insert("format".to_string(), "Png".to_string());
178            metadata.insert("page_index".to_string(), page_index.to_string());
179
180            images.push(CoverMedia {
181                kind: CoverMediaKind::PngImage,
182                data: Bytes::from(img.into_raw()),
183                metadata,
184            });
185        }
186
187        Ok(images)
188    }
189
190    #[expect(
191        clippy::too_many_lines,
192        reason = "PDF reconstruction logic is inherently complex"
193    )]
194    fn rebuild_pdf_from_images(
195        &self,
196        images: Vec<CoverMedia>,
197        _original: &CoverMedia,
198    ) -> Result<CoverMedia, PdfError> {
199        // Create a new PDF document
200        let mut doc = Document::with_version("1.7");
201
202        // Add each image as a page
203        for (page_index, img_media) in images.iter().enumerate() {
204            // Parse dimensions from metadata
205            let width: u32 = img_media
206                .metadata
207                .get("width")
208                .ok_or_else(|| PdfError::RebuildFailed {
209                    reason: "missing width metadata".to_string(),
210                })?
211                .parse()
212                .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
213                    reason: e.to_string(),
214                })?;
215
216            let height: u32 = img_media
217                .metadata
218                .get("height")
219                .ok_or_else(|| PdfError::RebuildFailed {
220                    reason: "missing height metadata".to_string(),
221                })?
222                .parse()
223                .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
224                    reason: e.to_string(),
225                })?;
226
227            // Convert RGBA data to PNG bytes
228            let img = image::RgbaImage::from_raw(width, height, img_media.data.to_vec())
229                .ok_or_else(|| PdfError::RebuildFailed {
230                    reason: "invalid image dimensions or data length".to_string(),
231                })?;
232
233            let dynamic_img = DynamicImage::ImageRgba8(img);
234            let mut png_bytes = Vec::new();
235            dynamic_img
236                .write_to(&mut std::io::Cursor::new(&mut png_bytes), ImageFormat::Png)
237                .map_err(|e| PdfError::RebuildFailed {
238                    reason: e.to_string(),
239                })?;
240
241            // Create a page with the image dimensions (convert pixels to points: 72 DPI)
242            #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
243            let page_width = width as f32 * 72.0 / f32::from(self.dpi);
244            #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
245            let page_height = height as f32 * 72.0 / f32::from(self.dpi);
246
247            let page_id = doc.new_object_id();
248            let page = doc.add_object(lopdf::dictionary! {
249                "Type" => "Page",
250                "MediaBox" => vec![0.into(), 0.into(), page_width.into(), page_height.into()],
251                "Contents" => Object::Reference((page_id.0 + 1, 0)),
252                "Resources" => lopdf::dictionary! {
253                    "XObject" => lopdf::dictionary! {
254                        "Image1" => Object::Reference((page_id.0 + 2, 0)),
255                    },
256                },
257            });
258
259            // Create content stream that displays the image
260            let content = format!("q\n{page_width} 0 0 {page_height} 0 0 cm\n/Image1 Do\nQ");
261            let content_id = doc.add_object(lopdf::Stream::new(
262                lopdf::dictionary! {},
263                content.into_bytes(),
264            ));
265
266            // Add the PNG image as an XObject
267            let image_id = doc.add_object(lopdf::Stream::new(
268                lopdf::dictionary! {
269                    "Type" => "XObject",
270                    "Subtype" => "Image",
271                    "Width" => i64::from(width),
272                    "Height" => i64::from(height),
273                    "ColorSpace" => "DeviceRGB",
274                    "BitsPerComponent" => 8,
275                    "Filter" => "FlateDecode",
276                },
277                png_bytes,
278            ));
279
280            // Verify object IDs match what we referenced
281            assert_eq!(page, (page_id.0, 0));
282            assert_eq!(content_id, (page_id.0 + 1, 0));
283            assert_eq!(image_id, (page_id.0 + 2, 0));
284
285            // Add page to pages collection
286            if doc.catalog().is_err() {
287                // Create catalog and pages root
288                let pages_obj_id = doc.new_object_id();
289                let catalog_id = doc.add_object(lopdf::dictionary! {
290                    "Type" => "Catalog",
291                    "Pages" => Object::Reference(pages_obj_id),
292                });
293                doc.trailer.set("Root", Object::Reference(catalog_id));
294
295                doc.objects.insert(
296                    pages_obj_id,
297                    lopdf::Object::Dictionary(lopdf::dictionary! {
298                        "Type" => "Pages",
299                        "Kids" => vec![Object::Reference(page)],
300                        "Count" => 1,
301                    }),
302                );
303            } else {
304                // Add to existing pages
305                if let Ok(pages_ref) = doc.catalog().and_then(|c| c.get(b"Pages"))
306                    && let Ok(pages_obj_id) = pages_ref.as_reference()
307                    && let Ok(pages_dict) = doc.get_object_mut(pages_obj_id)
308                    && let Object::Dictionary(dict) = pages_dict
309                {
310                    // Get current kids array
311                    let mut kids = if let Ok(Object::Array(arr)) = dict.get(b"Kids") {
312                        arr.clone()
313                    } else {
314                        vec![]
315                    };
316                    kids.push(Object::Reference(page));
317
318                    dict.set("Kids", Object::Array(kids));
319                    #[expect(clippy::cast_possible_wrap, reason = "page count fits in i64")]
320                    dict.set("Count", (page_index + 1) as i64);
321                }
322            }
323        }
324
325        // Serialize to bytes
326        let mut pdf_bytes = Vec::new();
327        doc.save_to(&mut BufWriter::new(&mut pdf_bytes))
328            .map_err(|e| PdfError::RebuildFailed {
329                reason: e.to_string(),
330            })?;
331
332        // Build metadata
333        let mut metadata = HashMap::new();
334        metadata.insert(KEY_PAGE_COUNT.to_string(), images.len().to_string());
335
336        Ok(CoverMedia {
337            kind: CoverMediaKind::PdfDocument,
338            data: Bytes::from(pdf_bytes),
339            metadata,
340        })
341    }
342
343    fn embed_in_content_stream(
344        &self,
345        pdf: CoverMedia,
346        payload: &Payload,
347    ) -> Result<CoverMedia, PdfError> {
348        // Load PDF from bytes
349        let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
350            reason: e.to_string(),
351        })?;
352
353        // Convert payload to bits
354        let payload_bits: Vec<u8> = payload
355            .as_bytes()
356            .iter()
357            .flat_map(|byte| (0..8).rev().map(move |i| (byte >> i) & 1))
358            .collect();
359
360        let mut bit_index = 0;
361
362        // Iterate through all objects to find content streams
363        let object_ids: Vec<_> = doc.objects.keys().copied().collect();
364        for obj_id in object_ids {
365            if bit_index >= payload_bits.len() {
366                break;
367            }
368
369            if let Ok(obj) = doc.get_object_mut(obj_id)
370                && let Object::Stream(stream) = obj
371            {
372                // Parse content stream
373                let content = String::from_utf8_lossy(&stream.content);
374                let mut modified_content = String::new();
375                let mut tokens: Vec<&str> = content.split_whitespace().collect();
376
377                for token in &mut tokens {
378                    if bit_index >= payload_bits.len() {
379                        modified_content.push_str(token);
380                        modified_content.push(' ');
381                        continue;
382                    }
383
384                    // Check if token is a number
385                    if let Ok(mut num) = token.parse::<i32>() {
386                        // Embed bit in LSB — bit_index < payload_bits.len() guaranteed by guard above
387                        if let Some(&bit) = payload_bits.get(bit_index) {
388                            if bit == 1 {
389                                num |= 1; // Set LSB
390                            } else {
391                                num &= !1; // Clear LSB
392                            }
393                        }
394                        modified_content.push_str(&num.to_string());
395                        bit_index += 1;
396                    } else {
397                        modified_content.push_str(token);
398                    }
399                    modified_content.push(' ');
400                }
401
402                // Update stream content
403                stream.set_content(modified_content.trim().as_bytes().to_vec());
404            }
405        }
406
407        if bit_index < payload_bits.len() {
408            return Err(PdfError::EmbedFailed {
409                reason: format!(
410                    "insufficient capacity: embedded {bit_index}/{} bits",
411                    payload_bits.len()
412                ),
413            });
414        }
415
416        // Serialize modified PDF
417        let mut pdf_bytes = Vec::new();
418        doc.save_to(&mut pdf_bytes)
419            .map_err(|e| PdfError::EmbedFailed {
420                reason: e.to_string(),
421            })?;
422
423        Ok(CoverMedia {
424            kind: pdf.kind,
425            data: Bytes::from(pdf_bytes),
426            metadata: pdf.metadata,
427        })
428    }
429
430    fn extract_from_content_stream(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
431        // Load PDF from bytes
432        let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
433            reason: e.to_string(),
434        })?;
435
436        let mut extracted_bits = Vec::new();
437
438        // Iterate through all objects to find content streams
439        for obj in doc.objects.values() {
440            if let Object::Stream(stream) = obj {
441                // Parse content stream
442                let content = String::from_utf8_lossy(&stream.content);
443                let tokens: Vec<&str> = content.split_whitespace().collect();
444
445                for token in tokens {
446                    // Check if token is a number
447                    if let Ok(num) = token.parse::<i32>() {
448                        // Extract LSB
449                        #[expect(clippy::cast_sign_loss, reason = "LSB is always 0 or 1")]
450                        extracted_bits.push((num & 1) as u8);
451                    }
452                }
453            }
454        }
455
456        // Convert bits to bytes
457        if extracted_bits.is_empty() {
458            return Err(PdfError::ExtractFailed {
459                reason: "no numeric values found in content streams".to_string(),
460            });
461        }
462
463        let mut payload_bytes = Vec::new();
464        for chunk in extracted_bits.chunks(8) {
465            if chunk.len() == 8 {
466                let mut byte = 0u8;
467                for (i, bit) in chunk.iter().enumerate() {
468                    byte |= bit << (7 - i);
469                }
470                payload_bytes.push(byte);
471            }
472        }
473
474        Ok(Payload::from_bytes(payload_bytes))
475    }
476
477    fn embed_in_metadata(
478        &self,
479        pdf: CoverMedia,
480        payload: &Payload,
481    ) -> Result<CoverMedia, PdfError> {
482        // Load PDF from bytes
483        let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
484            reason: e.to_string(),
485        })?;
486
487        // Base64-encode payload
488        let encoded = general_purpose::STANDARD.encode(payload.as_bytes());
489
490        // Create XMP metadata with custom field
491        let xmp_content = format!(
492            r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
493<x:xmpmeta xmlns:x="adobe:ns:meta/">
494  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
495    <rdf:Description rdf:about=""
496      xmlns:sf="http://shadowforge.org/ns/1.0/">
497      <sf:HiddenData>{encoded}</sf:HiddenData>
498    </rdf:Description>
499  </rdf:RDF>
500</x:xmpmeta>
501<?xpacket end="w"?>"#
502        );
503
504        // Create metadata stream
505        let metadata_id = doc.add_object(lopdf::Stream::new(
506            lopdf::dictionary! {
507                "Type" => "Metadata",
508                "Subtype" => "XML",
509            },
510            xmp_content.into_bytes(),
511        ));
512
513        // Add metadata reference to catalog
514        if let Ok(catalog) = doc.catalog_mut() {
515            catalog.set("Metadata", Object::Reference(metadata_id));
516        } else {
517            return Err(PdfError::EmbedFailed {
518                reason: "failed to access catalog".to_string(),
519            });
520        }
521
522        // Serialize modified PDF
523        let mut pdf_bytes = Vec::new();
524        doc.save_to(&mut pdf_bytes)
525            .map_err(|e| PdfError::EmbedFailed {
526                reason: e.to_string(),
527            })?;
528
529        Ok(CoverMedia {
530            kind: pdf.kind,
531            data: Bytes::from(pdf_bytes),
532            metadata: pdf.metadata,
533        })
534    }
535
536    fn extract_from_metadata(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
537        // Load PDF from bytes
538        let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
539            reason: e.to_string(),
540        })?;
541
542        // Get catalog
543        let catalog = doc.catalog().map_err(|e| PdfError::ExtractFailed {
544            reason: format!("failed to access catalog: {e}"),
545        })?;
546
547        // Get metadata reference
548        let metadata_ref = catalog
549            .get(b"Metadata")
550            .map_err(|_| PdfError::ExtractFailed {
551                reason: "no metadata found in catalog".to_string(),
552            })?
553            .as_reference()
554            .map_err(|_| PdfError::ExtractFailed {
555                reason: "metadata is not a reference".to_string(),
556            })?;
557
558        // Get metadata stream
559        let metadata_obj = doc
560            .get_object(metadata_ref)
561            .map_err(|e| PdfError::ExtractFailed {
562                reason: format!("failed to get metadata object: {e}"),
563            })?;
564
565        let metadata_stream = metadata_obj
566            .as_stream()
567            .map_err(|_| PdfError::ExtractFailed {
568                reason: "metadata is not a stream".to_string(),
569            })?;
570
571        // Parse XMP content
572        let xmp_content = String::from_utf8_lossy(&metadata_stream.content);
573
574        // Extract base64 data from <sf:HiddenData> tag
575        let start_tag = "<sf:HiddenData>";
576        let end_tag = "</sf:HiddenData>";
577
578        let start_idx = xmp_content
579            .find(start_tag)
580            .ok_or_else(|| PdfError::ExtractFailed {
581                reason: "no sf:HiddenData tag found".to_string(),
582            })?
583            .strict_add(start_tag.len());
584
585        let end_idx = xmp_content
586            .find(end_tag)
587            .ok_or_else(|| PdfError::ExtractFailed {
588                reason: "no closing sf:HiddenData tag found".to_string(),
589            })?;
590
591        let encoded_data = &xmp_content[start_idx..end_idx];
592
593        // Decode base64
594        let decoded = general_purpose::STANDARD
595            .decode(encoded_data.trim())
596            .map_err(|e| PdfError::ExtractFailed {
597                reason: format!("base64 decode failed: {e}"),
598            })?;
599
600        Ok(Payload::from_bytes(decoded))
601    }
602}
603
604fn ensure_pdf_cover(cover: &CoverMedia, technique: StegoTechnique) -> Result<Capacity, StegoError> {
605    if cover.kind != CoverMediaKind::PdfDocument {
606        return Err(StegoError::UnsupportedCoverType {
607            reason: format!("{technique:?} requires a PDF cover"),
608        });
609    }
610
611    Ok(Capacity {
612        bytes: estimate_capacity(cover, technique),
613        technique,
614    })
615}
616
617fn map_pdf_error(error: PdfError) -> StegoError {
618    match error {
619        PdfError::Encrypted => StegoError::UnsupportedCoverType {
620            reason: "encrypted PDF documents are not supported".to_string(),
621        },
622        PdfError::ExtractFailed { .. } => StegoError::NoPayloadFound,
623        PdfError::RenderFailed { page, reason } => StegoError::MalformedCoverData {
624            reason: format!("pdf render failed on page {page}: {reason}"),
625        },
626        PdfError::ParseFailed { reason }
627        | PdfError::RebuildFailed { reason }
628        | PdfError::EmbedFailed { reason }
629        | PdfError::IoError { reason } => StegoError::MalformedCoverData {
630            reason: format!("pdf processing failed: {reason}"),
631        },
632    }
633}
634
635/// Stego adapter that embeds payloads in PDF content streams.
636#[derive(Debug, Default)]
637pub struct PdfContentStreamStego {
638    processor: PdfProcessorImpl,
639}
640
641impl PdfContentStreamStego {
642    /// Create a content-stream PDF stego adapter with default processor settings.
643    #[must_use]
644    pub fn new() -> Self {
645        Self::default()
646    }
647}
648
649impl EmbedTechnique for PdfContentStreamStego {
650    fn technique(&self) -> StegoTechnique {
651        StegoTechnique::PdfContentStream
652    }
653
654    fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
655        ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
656    }
657
658    fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
659        ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
660        self.processor
661            .embed_in_content_stream(cover, payload)
662            .map_err(map_pdf_error)
663    }
664}
665
666impl ExtractTechnique for PdfContentStreamStego {
667    fn technique(&self) -> StegoTechnique {
668        StegoTechnique::PdfContentStream
669    }
670
671    fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
672        ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
673        self.processor
674            .extract_from_content_stream(stego)
675            .map_err(map_pdf_error)
676    }
677}
678
679/// Stego adapter that embeds payloads in PDF metadata fields.
680#[derive(Debug, Default)]
681pub struct PdfMetadataStego {
682    processor: PdfProcessorImpl,
683}
684
685impl PdfMetadataStego {
686    /// Create a metadata PDF stego adapter with default processor settings.
687    #[must_use]
688    pub fn new() -> Self {
689        Self::default()
690    }
691}
692
693impl EmbedTechnique for PdfMetadataStego {
694    fn technique(&self) -> StegoTechnique {
695        StegoTechnique::PdfMetadata
696    }
697
698    fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
699        ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
700    }
701
702    fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
703        ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
704        self.processor
705            .embed_in_metadata(cover, payload)
706            .map_err(map_pdf_error)
707    }
708}
709
710impl ExtractTechnique for PdfMetadataStego {
711    fn technique(&self) -> StegoTechnique {
712        StegoTechnique::PdfMetadata
713    }
714
715    fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
716        ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
717        self.processor
718            .extract_from_metadata(stego)
719            .map_err(map_pdf_error)
720    }
721}
722
723// ─── Tests ────────────────────────────────────────────────────────────────────
724
725#[cfg(test)]
726mod tests {
727    use super::*;
728    use tempfile::tempdir;
729
730    type TestResult = Result<(), Box<dyn std::error::Error>>;
731
732    #[test]
733    fn test_load_minimal_pdf() -> TestResult {
734        let processor = PdfProcessorImpl::default();
735        let dir = tempdir()?;
736        let path = dir.path().join("minimal.pdf");
737
738        // Create a minimal valid PDF with one page
739        let mut doc = Document::with_version("1.7");
740        let catalog_pages = doc.new_object_id();
741        let first_page = doc.new_object_id();
742
743        doc.objects.insert(
744            first_page,
745            Object::Dictionary(lopdf::dictionary! {
746                "Type" => "Page",
747                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
748                "Contents" => Object::Reference((first_page.0 + 1, 0)),
749            }),
750        );
751
752        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
753
754        doc.objects.insert(
755            catalog_pages,
756            Object::Dictionary(lopdf::dictionary! {
757                "Type" => "Pages",
758                "Kids" => vec![Object::Reference(first_page)],
759                "Count" => 1,
760            }),
761        );
762
763        let catalog_id = doc.add_object(lopdf::dictionary! {
764            "Type" => "Catalog",
765            "Pages" => Object::Reference(catalog_pages),
766        });
767
768        doc.trailer.set("Root", Object::Reference(catalog_id));
769        doc.save(&path)?;
770
771        // Load it
772        let media = processor.load_pdf(&path)?;
773        assert_eq!(media.kind, CoverMediaKind::PdfDocument);
774        assert_eq!(media.metadata.get(KEY_PAGE_COUNT), Some(&"1".to_string()));
775        Ok(())
776    }
777
778    #[test]
779    #[ignore = "requires pdfium system library"]
780    fn test_render_pages_returns_correct_count() -> TestResult {
781        let processor = PdfProcessorImpl::default();
782        let dir = tempdir()?;
783        let path = dir.path().join("two_page.pdf");
784
785        // Create a 2-page PDF
786        let mut doc = Document::with_version("1.7");
787        let catalog_pages = doc.new_object_id();
788
789        let page1_id = doc.new_object_id();
790        doc.objects.insert(
791            page1_id,
792            Object::Dictionary(lopdf::dictionary! {
793                "Type" => "Page",
794                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
795                "Contents" => Object::Reference((page1_id.0 + 1, 0)),
796            }),
797        );
798        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
799
800        let page2_id = doc.new_object_id();
801        doc.objects.insert(
802            page2_id,
803            Object::Dictionary(lopdf::dictionary! {
804                "Type" => "Page",
805                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
806                "Contents" => Object::Reference((page2_id.0 + 1, 0)),
807            }),
808        );
809        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
810
811        doc.objects.insert(
812            catalog_pages,
813            Object::Dictionary(lopdf::dictionary! {
814                "Type" => "Pages",
815                "Kids" => vec![
816                    Object::Reference(page1_id),
817                    Object::Reference(page2_id),
818                ],
819                "Count" => 2,
820            }),
821        );
822
823        let catalog_id = doc.add_object(lopdf::dictionary! {
824            "Type" => "Catalog",
825            "Pages" => Object::Reference(catalog_pages),
826        });
827
828        doc.trailer.set("Root", Object::Reference(catalog_id));
829        doc.save(&path)?;
830
831        // Load and render
832        let media = processor.load_pdf(&path)?;
833        let images = processor.render_pages_to_images(&media)?;
834        assert_eq!(images.len(), 2);
835        Ok(())
836    }
837
838    #[test]
839    #[ignore = "requires pdfium system library"]
840    fn test_rebuild_pdf_roundtrip() -> TestResult {
841        let processor = PdfProcessorImpl::default();
842        let dir = tempdir()?;
843        let path = dir.path().join("original.pdf");
844
845        // Create a 2-page PDF
846        let mut doc = Document::with_version("1.7");
847        let catalog_pages = doc.new_object_id();
848
849        let page1_id = doc.new_object_id();
850        doc.objects.insert(
851            page1_id,
852            Object::Dictionary(lopdf::dictionary! {
853                "Type" => "Page",
854                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
855                "Contents" => Object::Reference((page1_id.0 + 1, 0)),
856            }),
857        );
858        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
859
860        let page2_id = doc.new_object_id();
861        doc.objects.insert(
862            page2_id,
863            Object::Dictionary(lopdf::dictionary! {
864                "Type" => "Page",
865                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
866                "Contents" => Object::Reference((page2_id.0 + 1, 0)),
867            }),
868        );
869        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
870
871        doc.objects.insert(
872            catalog_pages,
873            Object::Dictionary(lopdf::dictionary! {
874                "Type" => "Pages",
875                "Kids" => vec![
876                    Object::Reference(page1_id),
877                    Object::Reference(page2_id),
878                ],
879                "Count" => 2,
880            }),
881        );
882
883        let catalog_id = doc.add_object(lopdf::dictionary! {
884            "Type" => "Catalog",
885            "Pages" => Object::Reference(catalog_pages),
886        });
887
888        doc.trailer.set("Root", Object::Reference(catalog_id));
889        doc.save(&path)?;
890
891        // Load, render, rebuild, and reload
892        let original = processor.load_pdf(&path)?;
893        let images = processor.render_pages_to_images(&original)?;
894        let rebuilt = processor.rebuild_pdf_from_images(images, &original)?;
895
896        // Save and reload to verify
897        let rebuilt_path = dir.path().join("rebuilt.pdf");
898        processor.save_pdf(&rebuilt, &rebuilt_path)?;
899        let reloaded = processor.load_pdf(&rebuilt_path)?;
900
901        assert_eq!(
902            reloaded.metadata.get(KEY_PAGE_COUNT),
903            original.metadata.get(KEY_PAGE_COUNT)
904        );
905        Ok(())
906    }
907
908    #[test]
909    #[ignore = "lopdf requires actual encrypted content, not just Encrypt trailer"]
910    fn test_encrypted_pdf_error() -> TestResult {
911        let processor = PdfProcessorImpl::default();
912        let dir = tempdir()?;
913        let path = dir.path().join("encrypted.pdf");
914
915        // Create an encrypted PDF
916        let mut doc = Document::with_version("1.7");
917        let catalog_pages = doc.new_object_id();
918        let first_page = doc.new_object_id();
919
920        doc.objects.insert(
921            first_page,
922            Object::Dictionary(lopdf::dictionary! {
923                "Type" => "Page",
924                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
925                "Contents" => Object::Reference((first_page.0 + 1, 0)),
926            }),
927        );
928
929        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
930
931        doc.objects.insert(
932            catalog_pages,
933            Object::Dictionary(lopdf::dictionary! {
934                "Type" => "Pages",
935                "Kids" => vec![Object::Reference(first_page)],
936                "Count" => 1,
937            }),
938        );
939
940        let catalog_id = doc.add_object(lopdf::dictionary! {
941            "Type" => "Catalog",
942            "Pages" => Object::Reference(catalog_pages),
943        });
944
945        doc.trailer.set("Root", Object::Reference(catalog_id));
946
947        // Add encryption dictionary
948        doc.trailer
949            .set("Encrypt", Object::Reference((doc.max_id + 1, 0)));
950        doc.objects.insert(
951            (doc.max_id + 1, 0),
952            Object::Dictionary(lopdf::dictionary! {
953                "Filter" => "Standard",
954                "V" => 1,
955                "R" => 2,
956            }),
957        );
958
959        doc.save(&path)?;
960
961        // Try to load it
962        let result = processor.load_pdf(&path);
963        assert!(matches!(result, Err(PdfError::Encrypted)));
964        Ok(())
965    }
966
967    #[test]
968    fn test_content_stream_lsb_roundtrip() -> TestResult {
969        let processor = PdfProcessorImpl::default();
970        let dir = tempdir()?;
971        let path = dir.path().join("test.pdf");
972
973        // Create a test PDF with content stream
974        let mut doc = Document::with_version("1.7");
975        let catalog_pages = doc.new_object_id();
976        let first_page = doc.new_object_id();
977
978        doc.objects.insert(
979            first_page,
980            Object::Dictionary(lopdf::dictionary! {
981                "Type" => "Page",
982                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
983                "Contents" => Object::Reference((first_page.0 + 1, 0)),
984            }),
985        );
986
987        // Content stream with many numeric values for capacity
988        let content = b"BT\n/F1 12 Tf\n100 700 Td\n(Hello) Tj\n200 650 Td\n(World) Tj\n50 600 Td\n(Test) Tj\n150 550 Td\n(PDF) Tj\nET\n1 0 0 1 0 0 cm\n";
989        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, content.to_vec()));
990
991        doc.objects.insert(
992            catalog_pages,
993            Object::Dictionary(lopdf::dictionary! {
994                "Type" => "Pages",
995                "Kids" => vec![Object::Reference(first_page)],
996                "Count" => 1,
997            }),
998        );
999
1000        let catalog_id = doc.add_object(lopdf::dictionary! {
1001            "Type" => "Catalog",
1002            "Pages" => Object::Reference(catalog_pages),
1003        });
1004
1005        doc.trailer.set("Root", Object::Reference(catalog_id));
1006        doc.save(&path)?;
1007
1008        // Load and embed payload (very small to fit limited capacity)
1009        let original = processor.load_pdf(&path)?;
1010        let payload = Payload::from_bytes(vec![0xAB]); // 1 byte = 8 bits (need 8+ numbers)
1011        let stego = processor.embed_in_content_stream(original, &payload)?;
1012
1013        // Verify PDF is still parseable
1014        let stego_path = dir.path().join("stego.pdf");
1015        processor.save_pdf(&stego, &stego_path)?;
1016        let reloaded = processor.load_pdf(&stego_path)?;
1017
1018        // Extract and verify
1019        let extracted = processor.extract_from_content_stream(&reloaded)?;
1020        assert_eq!(extracted.as_bytes(), payload.as_bytes());
1021        Ok(())
1022    }
1023
1024    #[test]
1025    fn test_metadata_embed_roundtrip() -> TestResult {
1026        let processor = PdfProcessorImpl::default();
1027        let dir = tempdir()?;
1028        let path = dir.path().join("test.pdf");
1029
1030        // Create a minimal test PDF
1031        let mut doc = Document::with_version("1.7");
1032        let catalog_pages = doc.new_object_id();
1033        let first_page = doc.new_object_id();
1034
1035        doc.objects.insert(
1036            first_page,
1037            Object::Dictionary(lopdf::dictionary! {
1038                "Type" => "Page",
1039                "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
1040                "Contents" => Object::Reference((first_page.0 + 1, 0)),
1041            }),
1042        );
1043
1044        doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
1045
1046        doc.objects.insert(
1047            catalog_pages,
1048            Object::Dictionary(lopdf::dictionary! {
1049                "Type" => "Pages",
1050                "Kids" => vec![Object::Reference(first_page)],
1051                "Count" => 1,
1052            }),
1053        );
1054
1055        let catalog_id = doc.add_object(lopdf::dictionary! {
1056            "Type" => "Catalog",
1057            "Pages" => Object::Reference(catalog_pages),
1058        });
1059
1060        doc.trailer.set("Root", Object::Reference(catalog_id));
1061        doc.save(&path)?;
1062
1063        // Load and embed payload
1064        let original = processor.load_pdf(&path)?;
1065        let payload = Payload::from_bytes(vec![0u8; 128]); // 128-byte payload
1066        let stego = processor.embed_in_metadata(original, &payload)?;
1067
1068        // Verify PDF is still parseable
1069        let stego_path = dir.path().join("stego.pdf");
1070        processor.save_pdf(&stego, &stego_path)?;
1071        let reloaded = processor.load_pdf(&stego_path)?;
1072
1073        // Extract and verify
1074        let extracted = processor.extract_from_metadata(&reloaded)?;
1075        assert_eq!(extracted.as_bytes(), payload.as_bytes());
1076        Ok(())
1077    }
1078}