1use std::collections::HashMap;
4use std::env;
5use std::io::BufWriter;
6use std::path::Path;
7
8use base64::Engine;
9use base64::engine::general_purpose;
10use bytes::Bytes;
11use image::{DynamicImage, ImageFormat};
12use lopdf::{Document, Object, dictionary};
13use pdfium_render::prelude::*;
14
15use crate::domain::analysis::estimate_capacity;
16use crate::domain::errors::{PdfError, StegoError};
17use crate::domain::ports::{EmbedTechnique, ExtractTechnique, PdfProcessor};
18use crate::domain::types::{Capacity, CoverMedia, CoverMediaKind, Payload, StegoTechnique};
19
20const KEY_PAGE_COUNT: &str = "page_count";
22const DEFAULT_DPI: u16 = 150;
23
24#[derive(Debug)]
28pub struct PdfProcessorImpl {
29 dpi: u16,
31}
32
33impl Default for PdfProcessorImpl {
34 fn default() -> Self {
35 Self { dpi: DEFAULT_DPI }
36 }
37}
38
39impl PdfProcessorImpl {
40 #[must_use]
42 pub const fn new(dpi: u16) -> Self {
43 Self { dpi }
44 }
45
46 fn bind_pdfium() -> Result<Pdfium, PdfError> {
47 let mut bind_errors = Vec::new();
48
49 if let Some(pdfium_dir) = env::var_os("PDFIUM_DYNAMIC_LIB_PATH") {
50 let library_path = Pdfium::pdfium_platform_library_name_at_path(&pdfium_dir);
51 match Pdfium::bind_to_library(library_path) {
52 Ok(bindings) => return Ok(Pdfium::new(bindings)),
53 Err(error) => bind_errors.push(format!(
54 "PDFIUM_DYNAMIC_LIB_PATH={}: {error}",
55 Path::new(&pdfium_dir).display()
56 )),
57 }
58 }
59
60 let local_library = Pdfium::pdfium_platform_library_name_at_path("./");
61 match Pdfium::bind_to_library(local_library) {
62 Ok(bindings) => return Ok(Pdfium::new(bindings)),
63 Err(error) => bind_errors.push(format!("./: {error}")),
64 }
65
66 match Pdfium::bind_to_system_library() {
67 Ok(bindings) => Ok(Pdfium::new(bindings)),
68 Err(error) => {
69 bind_errors.push(format!("system library: {error}"));
70 Err(PdfError::RenderFailed {
71 page: 0,
72 reason: format!(
73 "Failed to load pdfium library. Tried {}",
74 bind_errors.join(", ")
75 ),
76 })
77 }
78 }
79 }
80}
81
82impl PdfProcessor for PdfProcessorImpl {
83 fn load_pdf(&self, path: &Path) -> Result<CoverMedia, PdfError> {
84 let doc = Document::load(path).map_err(|e| PdfError::ParseFailed {
86 reason: e.to_string(),
87 })?;
88
89 if doc.is_encrypted() {
91 return Err(PdfError::Encrypted);
92 }
93
94 let page_count = doc.get_pages().len();
96
97 let bytes = std::fs::read(path).map_err(|e| PdfError::IoError {
99 reason: e.to_string(),
100 })?;
101
102 let mut metadata = HashMap::new();
104 metadata.insert(KEY_PAGE_COUNT.to_string(), page_count.to_string());
105
106 Ok(CoverMedia {
107 kind: CoverMediaKind::PdfDocument,
108 data: Bytes::from(bytes),
109 metadata,
110 })
111 }
112
113 fn save_pdf(&self, media: &CoverMedia, path: &Path) -> Result<(), PdfError> {
114 std::fs::write(path, &media.data).map_err(|e| PdfError::IoError {
116 reason: e.to_string(),
117 })?;
118
119 Ok(())
120 }
121
122 fn render_pages_to_images(&self, pdf: &CoverMedia) -> Result<Vec<CoverMedia>, PdfError> {
123 let pdfium = Self::bind_pdfium()?;
125
126 let document = pdfium
128 .load_pdf_from_byte_vec(pdf.data.to_vec(), None)
129 .map_err(|e| PdfError::ParseFailed {
130 reason: e.to_string(),
131 })?;
132
133 let page_count = document.pages().len();
134 let mut images = Vec::with_capacity(page_count as usize);
135
136 for page_index in 0..page_count {
138 let page = document
139 .pages()
140 .get(page_index)
141 .map_err(|e| PdfError::RenderFailed {
142 page: page_index as usize,
143 reason: e.to_string(),
144 })?;
145
146 #[expect(
148 clippy::cast_possible_truncation,
149 reason = "DPI calculation for render"
150 )]
151 let target_width = (page.width().value * f32::from(self.dpi) / 72.0) as i32;
152
153 let bitmap = page
154 .render_with_config(&PdfRenderConfig::new().set_target_width(target_width))
155 .map_err(|e| PdfError::RenderFailed {
156 page: page_index as usize,
157 reason: e.to_string(),
158 })?;
159
160 let width = bitmap.width().cast_unsigned();
162 let height = bitmap.height().cast_unsigned();
163 let rgba_data = bitmap.as_rgba_bytes();
164
165 let img =
166 image::RgbaImage::from_raw(width, height, rgba_data.clone()).ok_or_else(|| {
167 PdfError::RenderFailed {
168 page: page_index as usize,
169 reason: "invalid bitmap dimensions".to_string(),
170 }
171 })?;
172
173 let mut metadata = HashMap::new();
175 metadata.insert("width".to_string(), width.to_string());
176 metadata.insert("height".to_string(), height.to_string());
177 metadata.insert("format".to_string(), "Png".to_string());
178 metadata.insert("page_index".to_string(), page_index.to_string());
179
180 images.push(CoverMedia {
181 kind: CoverMediaKind::PngImage,
182 data: Bytes::from(img.into_raw()),
183 metadata,
184 });
185 }
186
187 Ok(images)
188 }
189
190 #[expect(
191 clippy::too_many_lines,
192 reason = "PDF reconstruction logic is inherently complex"
193 )]
194 fn rebuild_pdf_from_images(
195 &self,
196 images: Vec<CoverMedia>,
197 _original: &CoverMedia,
198 ) -> Result<CoverMedia, PdfError> {
199 let mut doc = Document::with_version("1.7");
201
202 for (page_index, img_media) in images.iter().enumerate() {
204 let width: u32 = img_media
206 .metadata
207 .get("width")
208 .ok_or_else(|| PdfError::RebuildFailed {
209 reason: "missing width metadata".to_string(),
210 })?
211 .parse()
212 .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
213 reason: e.to_string(),
214 })?;
215
216 let height: u32 = img_media
217 .metadata
218 .get("height")
219 .ok_or_else(|| PdfError::RebuildFailed {
220 reason: "missing height metadata".to_string(),
221 })?
222 .parse()
223 .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
224 reason: e.to_string(),
225 })?;
226
227 let img = image::RgbaImage::from_raw(width, height, img_media.data.to_vec())
229 .ok_or_else(|| PdfError::RebuildFailed {
230 reason: "invalid image dimensions or data length".to_string(),
231 })?;
232
233 let dynamic_img = DynamicImage::ImageRgba8(img);
234 let mut png_bytes = Vec::new();
235 dynamic_img
236 .write_to(&mut std::io::Cursor::new(&mut png_bytes), ImageFormat::Png)
237 .map_err(|e| PdfError::RebuildFailed {
238 reason: e.to_string(),
239 })?;
240
241 #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
243 let page_width = width as f32 * 72.0 / f32::from(self.dpi);
244 #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
245 let page_height = height as f32 * 72.0 / f32::from(self.dpi);
246
247 let page_id = doc.new_object_id();
248 let page = doc.add_object(lopdf::dictionary! {
249 "Type" => "Page",
250 "MediaBox" => vec![0.into(), 0.into(), page_width.into(), page_height.into()],
251 "Contents" => Object::Reference((page_id.0 + 1, 0)),
252 "Resources" => lopdf::dictionary! {
253 "XObject" => lopdf::dictionary! {
254 "Image1" => Object::Reference((page_id.0 + 2, 0)),
255 },
256 },
257 });
258
259 let content = format!("q\n{page_width} 0 0 {page_height} 0 0 cm\n/Image1 Do\nQ");
261 let content_id = doc.add_object(lopdf::Stream::new(
262 lopdf::dictionary! {},
263 content.into_bytes(),
264 ));
265
266 let image_id = doc.add_object(lopdf::Stream::new(
268 lopdf::dictionary! {
269 "Type" => "XObject",
270 "Subtype" => "Image",
271 "Width" => i64::from(width),
272 "Height" => i64::from(height),
273 "ColorSpace" => "DeviceRGB",
274 "BitsPerComponent" => 8,
275 "Filter" => "FlateDecode",
276 },
277 png_bytes,
278 ));
279
280 assert_eq!(page, (page_id.0, 0));
282 assert_eq!(content_id, (page_id.0 + 1, 0));
283 assert_eq!(image_id, (page_id.0 + 2, 0));
284
285 if doc.catalog().is_err() {
287 let pages_obj_id = doc.new_object_id();
289 let catalog_id = doc.add_object(lopdf::dictionary! {
290 "Type" => "Catalog",
291 "Pages" => Object::Reference(pages_obj_id),
292 });
293 doc.trailer.set("Root", Object::Reference(catalog_id));
294
295 doc.objects.insert(
296 pages_obj_id,
297 lopdf::Object::Dictionary(lopdf::dictionary! {
298 "Type" => "Pages",
299 "Kids" => vec![Object::Reference(page)],
300 "Count" => 1,
301 }),
302 );
303 } else {
304 if let Ok(pages_ref) = doc.catalog().and_then(|c| c.get(b"Pages"))
306 && let Ok(pages_obj_id) = pages_ref.as_reference()
307 && let Ok(pages_dict) = doc.get_object_mut(pages_obj_id)
308 && let Object::Dictionary(dict) = pages_dict
309 {
310 let mut kids = if let Ok(Object::Array(arr)) = dict.get(b"Kids") {
312 arr.clone()
313 } else {
314 vec![]
315 };
316 kids.push(Object::Reference(page));
317
318 dict.set("Kids", Object::Array(kids));
319 #[expect(clippy::cast_possible_wrap, reason = "page count fits in i64")]
320 dict.set("Count", (page_index + 1) as i64);
321 }
322 }
323 }
324
325 let mut pdf_bytes = Vec::new();
327 doc.save_to(&mut BufWriter::new(&mut pdf_bytes))
328 .map_err(|e| PdfError::RebuildFailed {
329 reason: e.to_string(),
330 })?;
331
332 let mut metadata = HashMap::new();
334 metadata.insert(KEY_PAGE_COUNT.to_string(), images.len().to_string());
335
336 Ok(CoverMedia {
337 kind: CoverMediaKind::PdfDocument,
338 data: Bytes::from(pdf_bytes),
339 metadata,
340 })
341 }
342
343 fn embed_in_content_stream(
344 &self,
345 pdf: CoverMedia,
346 payload: &Payload,
347 ) -> Result<CoverMedia, PdfError> {
348 let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
350 reason: e.to_string(),
351 })?;
352
353 let payload_bits: Vec<u8> = payload
355 .as_bytes()
356 .iter()
357 .flat_map(|byte| (0..8).rev().map(move |i| (byte >> i) & 1))
358 .collect();
359
360 let mut bit_index = 0;
361
362 let object_ids: Vec<_> = doc.objects.keys().copied().collect();
364 for obj_id in object_ids {
365 if bit_index >= payload_bits.len() {
366 break;
367 }
368
369 if let Ok(obj) = doc.get_object_mut(obj_id)
370 && let Object::Stream(stream) = obj
371 {
372 let content = String::from_utf8_lossy(&stream.content);
374 let mut modified_content = String::new();
375 let mut tokens: Vec<&str> = content.split_whitespace().collect();
376
377 for token in &mut tokens {
378 if bit_index >= payload_bits.len() {
379 modified_content.push_str(token);
380 modified_content.push(' ');
381 continue;
382 }
383
384 if let Ok(mut num) = token.parse::<i32>() {
386 if let Some(&bit) = payload_bits.get(bit_index) {
388 if bit == 1 {
389 num |= 1; } else {
391 num &= !1; }
393 }
394 modified_content.push_str(&num.to_string());
395 bit_index += 1;
396 } else {
397 modified_content.push_str(token);
398 }
399 modified_content.push(' ');
400 }
401
402 stream.set_content(modified_content.trim().as_bytes().to_vec());
404 }
405 }
406
407 if bit_index < payload_bits.len() {
408 return Err(PdfError::EmbedFailed {
409 reason: format!(
410 "insufficient capacity: embedded {bit_index}/{} bits",
411 payload_bits.len()
412 ),
413 });
414 }
415
416 let mut pdf_bytes = Vec::new();
418 doc.save_to(&mut pdf_bytes)
419 .map_err(|e| PdfError::EmbedFailed {
420 reason: e.to_string(),
421 })?;
422
423 Ok(CoverMedia {
424 kind: pdf.kind,
425 data: Bytes::from(pdf_bytes),
426 metadata: pdf.metadata,
427 })
428 }
429
430 fn extract_from_content_stream(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
431 let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
433 reason: e.to_string(),
434 })?;
435
436 let mut extracted_bits = Vec::new();
437
438 for obj in doc.objects.values() {
440 if let Object::Stream(stream) = obj {
441 let content = String::from_utf8_lossy(&stream.content);
443 let tokens: Vec<&str> = content.split_whitespace().collect();
444
445 for token in tokens {
446 if let Ok(num) = token.parse::<i32>() {
448 #[expect(clippy::cast_sign_loss, reason = "LSB is always 0 or 1")]
450 extracted_bits.push((num & 1) as u8);
451 }
452 }
453 }
454 }
455
456 if extracted_bits.is_empty() {
458 return Err(PdfError::ExtractFailed {
459 reason: "no numeric values found in content streams".to_string(),
460 });
461 }
462
463 let mut payload_bytes = Vec::new();
464 for chunk in extracted_bits.chunks(8) {
465 if chunk.len() == 8 {
466 let mut byte = 0u8;
467 for (i, bit) in chunk.iter().enumerate() {
468 byte |= bit << (7 - i);
469 }
470 payload_bytes.push(byte);
471 }
472 }
473
474 Ok(Payload::from_bytes(payload_bytes))
475 }
476
477 fn embed_in_metadata(
478 &self,
479 pdf: CoverMedia,
480 payload: &Payload,
481 ) -> Result<CoverMedia, PdfError> {
482 let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
484 reason: e.to_string(),
485 })?;
486
487 let encoded = general_purpose::STANDARD.encode(payload.as_bytes());
489
490 let xmp_content = format!(
492 r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
493<x:xmpmeta xmlns:x="adobe:ns:meta/">
494 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
495 <rdf:Description rdf:about=""
496 xmlns:sf="http://shadowforge.org/ns/1.0/">
497 <sf:HiddenData>{encoded}</sf:HiddenData>
498 </rdf:Description>
499 </rdf:RDF>
500</x:xmpmeta>
501<?xpacket end="w"?>"#
502 );
503
504 let metadata_id = doc.add_object(lopdf::Stream::new(
506 lopdf::dictionary! {
507 "Type" => "Metadata",
508 "Subtype" => "XML",
509 },
510 xmp_content.into_bytes(),
511 ));
512
513 if let Ok(catalog) = doc.catalog_mut() {
515 catalog.set("Metadata", Object::Reference(metadata_id));
516 } else {
517 return Err(PdfError::EmbedFailed {
518 reason: "failed to access catalog".to_string(),
519 });
520 }
521
522 let mut pdf_bytes = Vec::new();
524 doc.save_to(&mut pdf_bytes)
525 .map_err(|e| PdfError::EmbedFailed {
526 reason: e.to_string(),
527 })?;
528
529 Ok(CoverMedia {
530 kind: pdf.kind,
531 data: Bytes::from(pdf_bytes),
532 metadata: pdf.metadata,
533 })
534 }
535
536 fn extract_from_metadata(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
537 let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
539 reason: e.to_string(),
540 })?;
541
542 let catalog = doc.catalog().map_err(|e| PdfError::ExtractFailed {
544 reason: format!("failed to access catalog: {e}"),
545 })?;
546
547 let metadata_ref = catalog
549 .get(b"Metadata")
550 .map_err(|_| PdfError::ExtractFailed {
551 reason: "no metadata found in catalog".to_string(),
552 })?
553 .as_reference()
554 .map_err(|_| PdfError::ExtractFailed {
555 reason: "metadata is not a reference".to_string(),
556 })?;
557
558 let metadata_obj = doc
560 .get_object(metadata_ref)
561 .map_err(|e| PdfError::ExtractFailed {
562 reason: format!("failed to get metadata object: {e}"),
563 })?;
564
565 let metadata_stream = metadata_obj
566 .as_stream()
567 .map_err(|_| PdfError::ExtractFailed {
568 reason: "metadata is not a stream".to_string(),
569 })?;
570
571 let xmp_content = String::from_utf8_lossy(&metadata_stream.content);
573
574 let start_tag = "<sf:HiddenData>";
576 let end_tag = "</sf:HiddenData>";
577
578 let start_idx = xmp_content
579 .find(start_tag)
580 .ok_or_else(|| PdfError::ExtractFailed {
581 reason: "no sf:HiddenData tag found".to_string(),
582 })?
583 .strict_add(start_tag.len());
584
585 let end_idx = xmp_content
586 .find(end_tag)
587 .ok_or_else(|| PdfError::ExtractFailed {
588 reason: "no closing sf:HiddenData tag found".to_string(),
589 })?;
590
591 let encoded_data = &xmp_content[start_idx..end_idx];
592
593 let decoded = general_purpose::STANDARD
595 .decode(encoded_data.trim())
596 .map_err(|e| PdfError::ExtractFailed {
597 reason: format!("base64 decode failed: {e}"),
598 })?;
599
600 Ok(Payload::from_bytes(decoded))
601 }
602}
603
604fn ensure_pdf_cover(cover: &CoverMedia, technique: StegoTechnique) -> Result<Capacity, StegoError> {
605 if cover.kind != CoverMediaKind::PdfDocument {
606 return Err(StegoError::UnsupportedCoverType {
607 reason: format!("{technique:?} requires a PDF cover"),
608 });
609 }
610
611 Ok(Capacity {
612 bytes: estimate_capacity(cover, technique),
613 technique,
614 })
615}
616
617fn map_pdf_error(error: PdfError) -> StegoError {
618 match error {
619 PdfError::Encrypted => StegoError::UnsupportedCoverType {
620 reason: "encrypted PDF documents are not supported".to_string(),
621 },
622 PdfError::ExtractFailed { .. } => StegoError::NoPayloadFound,
623 PdfError::RenderFailed { page, reason } => StegoError::MalformedCoverData {
624 reason: format!("pdf render failed on page {page}: {reason}"),
625 },
626 PdfError::ParseFailed { reason }
627 | PdfError::RebuildFailed { reason }
628 | PdfError::EmbedFailed { reason }
629 | PdfError::IoError { reason } => StegoError::MalformedCoverData {
630 reason: format!("pdf processing failed: {reason}"),
631 },
632 }
633}
634
635#[derive(Debug, Default)]
637pub struct PdfContentStreamStego {
638 processor: PdfProcessorImpl,
639}
640
641impl PdfContentStreamStego {
642 #[must_use]
644 pub fn new() -> Self {
645 Self::default()
646 }
647}
648
649impl EmbedTechnique for PdfContentStreamStego {
650 fn technique(&self) -> StegoTechnique {
651 StegoTechnique::PdfContentStream
652 }
653
654 fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
655 ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
656 }
657
658 fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
659 ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
660 self.processor
661 .embed_in_content_stream(cover, payload)
662 .map_err(map_pdf_error)
663 }
664}
665
666impl ExtractTechnique for PdfContentStreamStego {
667 fn technique(&self) -> StegoTechnique {
668 StegoTechnique::PdfContentStream
669 }
670
671 fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
672 ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
673 self.processor
674 .extract_from_content_stream(stego)
675 .map_err(map_pdf_error)
676 }
677}
678
679#[derive(Debug, Default)]
681pub struct PdfMetadataStego {
682 processor: PdfProcessorImpl,
683}
684
685impl PdfMetadataStego {
686 #[must_use]
688 pub fn new() -> Self {
689 Self::default()
690 }
691}
692
693impl EmbedTechnique for PdfMetadataStego {
694 fn technique(&self) -> StegoTechnique {
695 StegoTechnique::PdfMetadata
696 }
697
698 fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
699 ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
700 }
701
702 fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
703 ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
704 self.processor
705 .embed_in_metadata(cover, payload)
706 .map_err(map_pdf_error)
707 }
708}
709
710impl ExtractTechnique for PdfMetadataStego {
711 fn technique(&self) -> StegoTechnique {
712 StegoTechnique::PdfMetadata
713 }
714
715 fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
716 ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
717 self.processor
718 .extract_from_metadata(stego)
719 .map_err(map_pdf_error)
720 }
721}
722
723#[cfg(test)]
726mod tests {
727 use super::*;
728 use tempfile::tempdir;
729
730 type TestResult = Result<(), Box<dyn std::error::Error>>;
731
732 #[test]
733 fn test_load_minimal_pdf() -> TestResult {
734 let processor = PdfProcessorImpl::default();
735 let dir = tempdir()?;
736 let path = dir.path().join("minimal.pdf");
737
738 let mut doc = Document::with_version("1.7");
740 let catalog_pages = doc.new_object_id();
741 let first_page = doc.new_object_id();
742
743 doc.objects.insert(
744 first_page,
745 Object::Dictionary(lopdf::dictionary! {
746 "Type" => "Page",
747 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
748 "Contents" => Object::Reference((first_page.0 + 1, 0)),
749 }),
750 );
751
752 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
753
754 doc.objects.insert(
755 catalog_pages,
756 Object::Dictionary(lopdf::dictionary! {
757 "Type" => "Pages",
758 "Kids" => vec![Object::Reference(first_page)],
759 "Count" => 1,
760 }),
761 );
762
763 let catalog_id = doc.add_object(lopdf::dictionary! {
764 "Type" => "Catalog",
765 "Pages" => Object::Reference(catalog_pages),
766 });
767
768 doc.trailer.set("Root", Object::Reference(catalog_id));
769 doc.save(&path)?;
770
771 let media = processor.load_pdf(&path)?;
773 assert_eq!(media.kind, CoverMediaKind::PdfDocument);
774 assert_eq!(media.metadata.get(KEY_PAGE_COUNT), Some(&"1".to_string()));
775 Ok(())
776 }
777
778 #[test]
779 #[ignore = "requires pdfium system library"]
780 fn test_render_pages_returns_correct_count() -> TestResult {
781 let processor = PdfProcessorImpl::default();
782 let dir = tempdir()?;
783 let path = dir.path().join("two_page.pdf");
784
785 let mut doc = Document::with_version("1.7");
787 let catalog_pages = doc.new_object_id();
788
789 let page1_id = doc.new_object_id();
790 doc.objects.insert(
791 page1_id,
792 Object::Dictionary(lopdf::dictionary! {
793 "Type" => "Page",
794 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
795 "Contents" => Object::Reference((page1_id.0 + 1, 0)),
796 }),
797 );
798 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
799
800 let page2_id = doc.new_object_id();
801 doc.objects.insert(
802 page2_id,
803 Object::Dictionary(lopdf::dictionary! {
804 "Type" => "Page",
805 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
806 "Contents" => Object::Reference((page2_id.0 + 1, 0)),
807 }),
808 );
809 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
810
811 doc.objects.insert(
812 catalog_pages,
813 Object::Dictionary(lopdf::dictionary! {
814 "Type" => "Pages",
815 "Kids" => vec![
816 Object::Reference(page1_id),
817 Object::Reference(page2_id),
818 ],
819 "Count" => 2,
820 }),
821 );
822
823 let catalog_id = doc.add_object(lopdf::dictionary! {
824 "Type" => "Catalog",
825 "Pages" => Object::Reference(catalog_pages),
826 });
827
828 doc.trailer.set("Root", Object::Reference(catalog_id));
829 doc.save(&path)?;
830
831 let media = processor.load_pdf(&path)?;
833 let images = processor.render_pages_to_images(&media)?;
834 assert_eq!(images.len(), 2);
835 Ok(())
836 }
837
838 #[test]
839 #[ignore = "requires pdfium system library"]
840 fn test_rebuild_pdf_roundtrip() -> TestResult {
841 let processor = PdfProcessorImpl::default();
842 let dir = tempdir()?;
843 let path = dir.path().join("original.pdf");
844
845 let mut doc = Document::with_version("1.7");
847 let catalog_pages = doc.new_object_id();
848
849 let page1_id = doc.new_object_id();
850 doc.objects.insert(
851 page1_id,
852 Object::Dictionary(lopdf::dictionary! {
853 "Type" => "Page",
854 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
855 "Contents" => Object::Reference((page1_id.0 + 1, 0)),
856 }),
857 );
858 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
859
860 let page2_id = doc.new_object_id();
861 doc.objects.insert(
862 page2_id,
863 Object::Dictionary(lopdf::dictionary! {
864 "Type" => "Page",
865 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
866 "Contents" => Object::Reference((page2_id.0 + 1, 0)),
867 }),
868 );
869 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
870
871 doc.objects.insert(
872 catalog_pages,
873 Object::Dictionary(lopdf::dictionary! {
874 "Type" => "Pages",
875 "Kids" => vec![
876 Object::Reference(page1_id),
877 Object::Reference(page2_id),
878 ],
879 "Count" => 2,
880 }),
881 );
882
883 let catalog_id = doc.add_object(lopdf::dictionary! {
884 "Type" => "Catalog",
885 "Pages" => Object::Reference(catalog_pages),
886 });
887
888 doc.trailer.set("Root", Object::Reference(catalog_id));
889 doc.save(&path)?;
890
891 let original = processor.load_pdf(&path)?;
893 let images = processor.render_pages_to_images(&original)?;
894 let rebuilt = processor.rebuild_pdf_from_images(images, &original)?;
895
896 let rebuilt_path = dir.path().join("rebuilt.pdf");
898 processor.save_pdf(&rebuilt, &rebuilt_path)?;
899 let reloaded = processor.load_pdf(&rebuilt_path)?;
900
901 assert_eq!(
902 reloaded.metadata.get(KEY_PAGE_COUNT),
903 original.metadata.get(KEY_PAGE_COUNT)
904 );
905 Ok(())
906 }
907
908 #[test]
909 #[ignore = "lopdf requires actual encrypted content, not just Encrypt trailer"]
910 fn test_encrypted_pdf_error() -> TestResult {
911 let processor = PdfProcessorImpl::default();
912 let dir = tempdir()?;
913 let path = dir.path().join("encrypted.pdf");
914
915 let mut doc = Document::with_version("1.7");
917 let catalog_pages = doc.new_object_id();
918 let first_page = doc.new_object_id();
919
920 doc.objects.insert(
921 first_page,
922 Object::Dictionary(lopdf::dictionary! {
923 "Type" => "Page",
924 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
925 "Contents" => Object::Reference((first_page.0 + 1, 0)),
926 }),
927 );
928
929 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
930
931 doc.objects.insert(
932 catalog_pages,
933 Object::Dictionary(lopdf::dictionary! {
934 "Type" => "Pages",
935 "Kids" => vec![Object::Reference(first_page)],
936 "Count" => 1,
937 }),
938 );
939
940 let catalog_id = doc.add_object(lopdf::dictionary! {
941 "Type" => "Catalog",
942 "Pages" => Object::Reference(catalog_pages),
943 });
944
945 doc.trailer.set("Root", Object::Reference(catalog_id));
946
947 doc.trailer
949 .set("Encrypt", Object::Reference((doc.max_id + 1, 0)));
950 doc.objects.insert(
951 (doc.max_id + 1, 0),
952 Object::Dictionary(lopdf::dictionary! {
953 "Filter" => "Standard",
954 "V" => 1,
955 "R" => 2,
956 }),
957 );
958
959 doc.save(&path)?;
960
961 let result = processor.load_pdf(&path);
963 assert!(matches!(result, Err(PdfError::Encrypted)));
964 Ok(())
965 }
966
967 #[test]
968 fn test_content_stream_lsb_roundtrip() -> TestResult {
969 let processor = PdfProcessorImpl::default();
970 let dir = tempdir()?;
971 let path = dir.path().join("test.pdf");
972
973 let mut doc = Document::with_version("1.7");
975 let catalog_pages = doc.new_object_id();
976 let first_page = doc.new_object_id();
977
978 doc.objects.insert(
979 first_page,
980 Object::Dictionary(lopdf::dictionary! {
981 "Type" => "Page",
982 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
983 "Contents" => Object::Reference((first_page.0 + 1, 0)),
984 }),
985 );
986
987 let content = b"BT\n/F1 12 Tf\n100 700 Td\n(Hello) Tj\n200 650 Td\n(World) Tj\n50 600 Td\n(Test) Tj\n150 550 Td\n(PDF) Tj\nET\n1 0 0 1 0 0 cm\n";
989 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, content.to_vec()));
990
991 doc.objects.insert(
992 catalog_pages,
993 Object::Dictionary(lopdf::dictionary! {
994 "Type" => "Pages",
995 "Kids" => vec![Object::Reference(first_page)],
996 "Count" => 1,
997 }),
998 );
999
1000 let catalog_id = doc.add_object(lopdf::dictionary! {
1001 "Type" => "Catalog",
1002 "Pages" => Object::Reference(catalog_pages),
1003 });
1004
1005 doc.trailer.set("Root", Object::Reference(catalog_id));
1006 doc.save(&path)?;
1007
1008 let original = processor.load_pdf(&path)?;
1010 let payload = Payload::from_bytes(vec![0xAB]); let stego = processor.embed_in_content_stream(original, &payload)?;
1012
1013 let stego_path = dir.path().join("stego.pdf");
1015 processor.save_pdf(&stego, &stego_path)?;
1016 let reloaded = processor.load_pdf(&stego_path)?;
1017
1018 let extracted = processor.extract_from_content_stream(&reloaded)?;
1020 assert_eq!(extracted.as_bytes(), payload.as_bytes());
1021 Ok(())
1022 }
1023
1024 #[test]
1025 fn test_metadata_embed_roundtrip() -> TestResult {
1026 let processor = PdfProcessorImpl::default();
1027 let dir = tempdir()?;
1028 let path = dir.path().join("test.pdf");
1029
1030 let mut doc = Document::with_version("1.7");
1032 let catalog_pages = doc.new_object_id();
1033 let first_page = doc.new_object_id();
1034
1035 doc.objects.insert(
1036 first_page,
1037 Object::Dictionary(lopdf::dictionary! {
1038 "Type" => "Page",
1039 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
1040 "Contents" => Object::Reference((first_page.0 + 1, 0)),
1041 }),
1042 );
1043
1044 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
1045
1046 doc.objects.insert(
1047 catalog_pages,
1048 Object::Dictionary(lopdf::dictionary! {
1049 "Type" => "Pages",
1050 "Kids" => vec![Object::Reference(first_page)],
1051 "Count" => 1,
1052 }),
1053 );
1054
1055 let catalog_id = doc.add_object(lopdf::dictionary! {
1056 "Type" => "Catalog",
1057 "Pages" => Object::Reference(catalog_pages),
1058 });
1059
1060 doc.trailer.set("Root", Object::Reference(catalog_id));
1061 doc.save(&path)?;
1062
1063 let original = processor.load_pdf(&path)?;
1065 let payload = Payload::from_bytes(vec![0u8; 128]); let stego = processor.embed_in_metadata(original, &payload)?;
1067
1068 let stego_path = dir.path().join("stego.pdf");
1070 processor.save_pdf(&stego, &stego_path)?;
1071 let reloaded = processor.load_pdf(&stego_path)?;
1072
1073 let extracted = processor.extract_from_metadata(&reloaded)?;
1075 assert_eq!(extracted.as_bytes(), payload.as_bytes());
1076 Ok(())
1077 }
1078}