1use std::collections::HashMap;
4use std::io::BufWriter;
5use std::path::Path;
6
7use base64::Engine;
8use base64::engine::general_purpose;
9use bytes::Bytes;
10use image::{DynamicImage, ImageFormat};
11use lopdf::{Document, Object, dictionary};
12use pdfium_render::prelude::*;
13
14use crate::domain::analysis::estimate_capacity;
15use crate::domain::errors::{PdfError, StegoError};
16use crate::domain::ports::{EmbedTechnique, ExtractTechnique, PdfProcessor};
17use crate::domain::types::{Capacity, CoverMedia, CoverMediaKind, Payload, StegoTechnique};
18
19const KEY_PAGE_COUNT: &str = "page_count";
21const DEFAULT_DPI: u16 = 150;
22
23#[derive(Debug)]
27pub struct PdfProcessorImpl {
28 dpi: u16,
30}
31
32impl Default for PdfProcessorImpl {
33 fn default() -> Self {
34 Self { dpi: DEFAULT_DPI }
35 }
36}
37
38impl PdfProcessorImpl {
39 #[must_use]
41 pub const fn new(dpi: u16) -> Self {
42 Self { dpi }
43 }
44}
45
46impl PdfProcessor for PdfProcessorImpl {
47 fn load_pdf(&self, path: &Path) -> Result<CoverMedia, PdfError> {
48 let doc = Document::load(path).map_err(|e| PdfError::ParseFailed {
50 reason: e.to_string(),
51 })?;
52
53 if doc.is_encrypted() {
55 return Err(PdfError::Encrypted);
56 }
57
58 let page_count = doc.get_pages().len();
60
61 let bytes = std::fs::read(path).map_err(|e| PdfError::IoError {
63 reason: e.to_string(),
64 })?;
65
66 let mut metadata = HashMap::new();
68 metadata.insert(KEY_PAGE_COUNT.to_string(), page_count.to_string());
69
70 Ok(CoverMedia {
71 kind: CoverMediaKind::PdfDocument,
72 data: Bytes::from(bytes),
73 metadata,
74 })
75 }
76
77 fn save_pdf(&self, media: &CoverMedia, path: &Path) -> Result<(), PdfError> {
78 std::fs::write(path, &media.data).map_err(|e| PdfError::IoError {
80 reason: e.to_string(),
81 })?;
82
83 Ok(())
84 }
85
86 fn render_pages_to_images(&self, pdf: &CoverMedia) -> Result<Vec<CoverMedia>, PdfError> {
87 let pdfium = Pdfium::new(
89 Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./"))
90 .or_else(|_| Pdfium::bind_to_system_library())
91 .map_err(|e| PdfError::RenderFailed {
92 page: 0,
93 reason: format!("Failed to load pdfium library: {e}"),
94 })?,
95 );
96
97 let document = pdfium
99 .load_pdf_from_byte_vec(pdf.data.to_vec(), None)
100 .map_err(|e| PdfError::ParseFailed {
101 reason: e.to_string(),
102 })?;
103
104 let page_count = document.pages().len();
105 let mut images = Vec::with_capacity(page_count as usize);
106
107 for page_index in 0..page_count {
109 let page = document
110 .pages()
111 .get(page_index)
112 .map_err(|e| PdfError::RenderFailed {
113 page: page_index as usize,
114 reason: e.to_string(),
115 })?;
116
117 #[expect(
119 clippy::cast_possible_truncation,
120 reason = "DPI calculation for render"
121 )]
122 let target_width = (page.width().value * f32::from(self.dpi) / 72.0) as i32;
123
124 let bitmap = page
125 .render_with_config(&PdfRenderConfig::new().set_target_width(target_width))
126 .map_err(|e| PdfError::RenderFailed {
127 page: page_index as usize,
128 reason: e.to_string(),
129 })?;
130
131 let width = bitmap.width().cast_unsigned();
133 let height = bitmap.height().cast_unsigned();
134 let rgba_data = bitmap.as_rgba_bytes();
135
136 let img =
137 image::RgbaImage::from_raw(width, height, rgba_data.clone()).ok_or_else(|| {
138 PdfError::RenderFailed {
139 page: page_index as usize,
140 reason: "invalid bitmap dimensions".to_string(),
141 }
142 })?;
143
144 let mut metadata = HashMap::new();
146 metadata.insert("width".to_string(), width.to_string());
147 metadata.insert("height".to_string(), height.to_string());
148 metadata.insert("format".to_string(), "Png".to_string());
149 metadata.insert("page_index".to_string(), page_index.to_string());
150
151 images.push(CoverMedia {
152 kind: CoverMediaKind::PngImage,
153 data: Bytes::from(img.into_raw()),
154 metadata,
155 });
156 }
157
158 Ok(images)
159 }
160
161 #[expect(
162 clippy::too_many_lines,
163 reason = "PDF reconstruction logic is inherently complex"
164 )]
165 fn rebuild_pdf_from_images(
166 &self,
167 images: Vec<CoverMedia>,
168 _original: &CoverMedia,
169 ) -> Result<CoverMedia, PdfError> {
170 let mut doc = Document::with_version("1.7");
172
173 for (page_index, img_media) in images.iter().enumerate() {
175 let width: u32 = img_media
177 .metadata
178 .get("width")
179 .ok_or_else(|| PdfError::RebuildFailed {
180 reason: "missing width metadata".to_string(),
181 })?
182 .parse()
183 .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
184 reason: e.to_string(),
185 })?;
186
187 let height: u32 = img_media
188 .metadata
189 .get("height")
190 .ok_or_else(|| PdfError::RebuildFailed {
191 reason: "missing height metadata".to_string(),
192 })?
193 .parse()
194 .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
195 reason: e.to_string(),
196 })?;
197
198 let img = image::RgbaImage::from_raw(width, height, img_media.data.to_vec())
200 .ok_or_else(|| PdfError::RebuildFailed {
201 reason: "invalid image dimensions or data length".to_string(),
202 })?;
203
204 let dynamic_img = DynamicImage::ImageRgba8(img);
205 let mut png_bytes = Vec::new();
206 dynamic_img
207 .write_to(&mut std::io::Cursor::new(&mut png_bytes), ImageFormat::Png)
208 .map_err(|e| PdfError::RebuildFailed {
209 reason: e.to_string(),
210 })?;
211
212 #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
214 let page_width = width as f32 * 72.0 / f32::from(self.dpi);
215 #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
216 let page_height = height as f32 * 72.0 / f32::from(self.dpi);
217
218 let page_id = doc.new_object_id();
219 let page = doc.add_object(lopdf::dictionary! {
220 "Type" => "Page",
221 "MediaBox" => vec![0.into(), 0.into(), page_width.into(), page_height.into()],
222 "Contents" => Object::Reference((page_id.0 + 1, 0)),
223 "Resources" => lopdf::dictionary! {
224 "XObject" => lopdf::dictionary! {
225 "Image1" => Object::Reference((page_id.0 + 2, 0)),
226 },
227 },
228 });
229
230 let content = format!("q\n{page_width} 0 0 {page_height} 0 0 cm\n/Image1 Do\nQ");
232 let content_id = doc.add_object(lopdf::Stream::new(
233 lopdf::dictionary! {},
234 content.into_bytes(),
235 ));
236
237 let image_id = doc.add_object(lopdf::Stream::new(
239 lopdf::dictionary! {
240 "Type" => "XObject",
241 "Subtype" => "Image",
242 "Width" => i64::from(width),
243 "Height" => i64::from(height),
244 "ColorSpace" => "DeviceRGB",
245 "BitsPerComponent" => 8,
246 "Filter" => "FlateDecode",
247 },
248 png_bytes,
249 ));
250
251 assert_eq!(page, (page_id.0, 0));
253 assert_eq!(content_id, (page_id.0 + 1, 0));
254 assert_eq!(image_id, (page_id.0 + 2, 0));
255
256 if doc.catalog().is_err() {
258 let pages_obj_id = doc.new_object_id();
260 let catalog_id = doc.add_object(lopdf::dictionary! {
261 "Type" => "Catalog",
262 "Pages" => Object::Reference(pages_obj_id),
263 });
264 doc.trailer.set("Root", Object::Reference(catalog_id));
265
266 doc.objects.insert(
267 pages_obj_id,
268 lopdf::Object::Dictionary(lopdf::dictionary! {
269 "Type" => "Pages",
270 "Kids" => vec![Object::Reference(page)],
271 "Count" => 1,
272 }),
273 );
274 } else {
275 if let Ok(pages_ref) = doc.catalog().and_then(|c| c.get(b"Pages"))
277 && let Ok(pages_obj_id) = pages_ref.as_reference()
278 && let Ok(pages_dict) = doc.get_object_mut(pages_obj_id)
279 && let Object::Dictionary(dict) = pages_dict
280 {
281 let mut kids = if let Ok(Object::Array(arr)) = dict.get(b"Kids") {
283 arr.clone()
284 } else {
285 vec![]
286 };
287 kids.push(Object::Reference(page));
288
289 dict.set("Kids", Object::Array(kids));
290 #[expect(clippy::cast_possible_wrap, reason = "page count fits in i64")]
291 dict.set("Count", (page_index + 1) as i64);
292 }
293 }
294 }
295
296 let mut pdf_bytes = Vec::new();
298 doc.save_to(&mut BufWriter::new(&mut pdf_bytes))
299 .map_err(|e| PdfError::RebuildFailed {
300 reason: e.to_string(),
301 })?;
302
303 let mut metadata = HashMap::new();
305 metadata.insert(KEY_PAGE_COUNT.to_string(), images.len().to_string());
306
307 Ok(CoverMedia {
308 kind: CoverMediaKind::PdfDocument,
309 data: Bytes::from(pdf_bytes),
310 metadata,
311 })
312 }
313
314 fn embed_in_content_stream(
315 &self,
316 pdf: CoverMedia,
317 payload: &Payload,
318 ) -> Result<CoverMedia, PdfError> {
319 let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
321 reason: e.to_string(),
322 })?;
323
324 let payload_bits: Vec<u8> = payload
326 .as_bytes()
327 .iter()
328 .flat_map(|byte| (0..8).rev().map(move |i| (byte >> i) & 1))
329 .collect();
330
331 let mut bit_index = 0;
332
333 let object_ids: Vec<_> = doc.objects.keys().copied().collect();
335 for obj_id in object_ids {
336 if bit_index >= payload_bits.len() {
337 break;
338 }
339
340 if let Ok(obj) = doc.get_object_mut(obj_id)
341 && let Object::Stream(stream) = obj
342 {
343 let content = String::from_utf8_lossy(&stream.content);
345 let mut modified_content = String::new();
346 let mut tokens: Vec<&str> = content.split_whitespace().collect();
347
348 for token in &mut tokens {
349 if bit_index >= payload_bits.len() {
350 modified_content.push_str(token);
351 modified_content.push(' ');
352 continue;
353 }
354
355 if let Ok(mut num) = token.parse::<i32>() {
357 if let Some(&bit) = payload_bits.get(bit_index) {
359 if bit == 1 {
360 num |= 1; } else {
362 num &= !1; }
364 }
365 modified_content.push_str(&num.to_string());
366 bit_index += 1;
367 } else {
368 modified_content.push_str(token);
369 }
370 modified_content.push(' ');
371 }
372
373 stream.set_content(modified_content.trim().as_bytes().to_vec());
375 }
376 }
377
378 if bit_index < payload_bits.len() {
379 return Err(PdfError::EmbedFailed {
380 reason: format!(
381 "insufficient capacity: embedded {bit_index}/{} bits",
382 payload_bits.len()
383 ),
384 });
385 }
386
387 let mut pdf_bytes = Vec::new();
389 doc.save_to(&mut pdf_bytes)
390 .map_err(|e| PdfError::EmbedFailed {
391 reason: e.to_string(),
392 })?;
393
394 Ok(CoverMedia {
395 kind: pdf.kind,
396 data: Bytes::from(pdf_bytes),
397 metadata: pdf.metadata,
398 })
399 }
400
401 fn extract_from_content_stream(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
402 let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
404 reason: e.to_string(),
405 })?;
406
407 let mut extracted_bits = Vec::new();
408
409 for obj in doc.objects.values() {
411 if let Object::Stream(stream) = obj {
412 let content = String::from_utf8_lossy(&stream.content);
414 let tokens: Vec<&str> = content.split_whitespace().collect();
415
416 for token in tokens {
417 if let Ok(num) = token.parse::<i32>() {
419 #[expect(clippy::cast_sign_loss, reason = "LSB is always 0 or 1")]
421 extracted_bits.push((num & 1) as u8);
422 }
423 }
424 }
425 }
426
427 if extracted_bits.is_empty() {
429 return Err(PdfError::ExtractFailed {
430 reason: "no numeric values found in content streams".to_string(),
431 });
432 }
433
434 let mut payload_bytes = Vec::new();
435 for chunk in extracted_bits.chunks(8) {
436 if chunk.len() == 8 {
437 let mut byte = 0u8;
438 for (i, bit) in chunk.iter().enumerate() {
439 byte |= bit << (7 - i);
440 }
441 payload_bytes.push(byte);
442 }
443 }
444
445 Ok(Payload::from_bytes(payload_bytes))
446 }
447
448 fn embed_in_metadata(
449 &self,
450 pdf: CoverMedia,
451 payload: &Payload,
452 ) -> Result<CoverMedia, PdfError> {
453 let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
455 reason: e.to_string(),
456 })?;
457
458 let encoded = general_purpose::STANDARD.encode(payload.as_bytes());
460
461 let xmp_content = format!(
463 r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
464<x:xmpmeta xmlns:x="adobe:ns:meta/">
465 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
466 <rdf:Description rdf:about=""
467 xmlns:sf="http://shadowforge.org/ns/1.0/">
468 <sf:HiddenData>{encoded}</sf:HiddenData>
469 </rdf:Description>
470 </rdf:RDF>
471</x:xmpmeta>
472<?xpacket end="w"?>"#
473 );
474
475 let metadata_id = doc.add_object(lopdf::Stream::new(
477 lopdf::dictionary! {
478 "Type" => "Metadata",
479 "Subtype" => "XML",
480 },
481 xmp_content.into_bytes(),
482 ));
483
484 if let Ok(catalog) = doc.catalog_mut() {
486 catalog.set("Metadata", Object::Reference(metadata_id));
487 } else {
488 return Err(PdfError::EmbedFailed {
489 reason: "failed to access catalog".to_string(),
490 });
491 }
492
493 let mut pdf_bytes = Vec::new();
495 doc.save_to(&mut pdf_bytes)
496 .map_err(|e| PdfError::EmbedFailed {
497 reason: e.to_string(),
498 })?;
499
500 Ok(CoverMedia {
501 kind: pdf.kind,
502 data: Bytes::from(pdf_bytes),
503 metadata: pdf.metadata,
504 })
505 }
506
507 fn extract_from_metadata(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
508 let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
510 reason: e.to_string(),
511 })?;
512
513 let catalog = doc.catalog().map_err(|e| PdfError::ExtractFailed {
515 reason: format!("failed to access catalog: {e}"),
516 })?;
517
518 let metadata_ref = catalog
520 .get(b"Metadata")
521 .map_err(|_| PdfError::ExtractFailed {
522 reason: "no metadata found in catalog".to_string(),
523 })?
524 .as_reference()
525 .map_err(|_| PdfError::ExtractFailed {
526 reason: "metadata is not a reference".to_string(),
527 })?;
528
529 let metadata_obj = doc
531 .get_object(metadata_ref)
532 .map_err(|e| PdfError::ExtractFailed {
533 reason: format!("failed to get metadata object: {e}"),
534 })?;
535
536 let metadata_stream = metadata_obj
537 .as_stream()
538 .map_err(|_| PdfError::ExtractFailed {
539 reason: "metadata is not a stream".to_string(),
540 })?;
541
542 let xmp_content = String::from_utf8_lossy(&metadata_stream.content);
544
545 let start_tag = "<sf:HiddenData>";
547 let end_tag = "</sf:HiddenData>";
548
549 let start_idx = xmp_content
550 .find(start_tag)
551 .ok_or_else(|| PdfError::ExtractFailed {
552 reason: "no sf:HiddenData tag found".to_string(),
553 })?
554 .strict_add(start_tag.len());
555
556 let end_idx = xmp_content
557 .find(end_tag)
558 .ok_or_else(|| PdfError::ExtractFailed {
559 reason: "no closing sf:HiddenData tag found".to_string(),
560 })?;
561
562 let encoded_data = &xmp_content[start_idx..end_idx];
563
564 let decoded = general_purpose::STANDARD
566 .decode(encoded_data.trim())
567 .map_err(|e| PdfError::ExtractFailed {
568 reason: format!("base64 decode failed: {e}"),
569 })?;
570
571 Ok(Payload::from_bytes(decoded))
572 }
573}
574
575fn ensure_pdf_cover(cover: &CoverMedia, technique: StegoTechnique) -> Result<Capacity, StegoError> {
576 if cover.kind != CoverMediaKind::PdfDocument {
577 return Err(StegoError::UnsupportedCoverType {
578 reason: format!("{technique:?} requires a PDF cover"),
579 });
580 }
581
582 Ok(Capacity {
583 bytes: estimate_capacity(cover, technique),
584 technique,
585 })
586}
587
588fn map_pdf_error(error: PdfError) -> StegoError {
589 match error {
590 PdfError::Encrypted => StegoError::UnsupportedCoverType {
591 reason: "encrypted PDF documents are not supported".to_string(),
592 },
593 PdfError::ExtractFailed { .. } => StegoError::NoPayloadFound,
594 PdfError::RenderFailed { page, reason } => StegoError::MalformedCoverData {
595 reason: format!("pdf render failed on page {page}: {reason}"),
596 },
597 PdfError::ParseFailed { reason }
598 | PdfError::RebuildFailed { reason }
599 | PdfError::EmbedFailed { reason }
600 | PdfError::IoError { reason } => StegoError::MalformedCoverData {
601 reason: format!("pdf processing failed: {reason}"),
602 },
603 }
604}
605
606#[derive(Debug, Default)]
608pub struct PdfContentStreamStego {
609 processor: PdfProcessorImpl,
610}
611
612impl PdfContentStreamStego {
613 #[must_use]
615 pub fn new() -> Self {
616 Self::default()
617 }
618}
619
620impl EmbedTechnique for PdfContentStreamStego {
621 fn technique(&self) -> StegoTechnique {
622 StegoTechnique::PdfContentStream
623 }
624
625 fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
626 ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
627 }
628
629 fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
630 ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
631 self.processor
632 .embed_in_content_stream(cover, payload)
633 .map_err(map_pdf_error)
634 }
635}
636
637impl ExtractTechnique for PdfContentStreamStego {
638 fn technique(&self) -> StegoTechnique {
639 StegoTechnique::PdfContentStream
640 }
641
642 fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
643 ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
644 self.processor
645 .extract_from_content_stream(stego)
646 .map_err(map_pdf_error)
647 }
648}
649
650#[derive(Debug, Default)]
652pub struct PdfMetadataStego {
653 processor: PdfProcessorImpl,
654}
655
656impl PdfMetadataStego {
657 #[must_use]
659 pub fn new() -> Self {
660 Self::default()
661 }
662}
663
664impl EmbedTechnique for PdfMetadataStego {
665 fn technique(&self) -> StegoTechnique {
666 StegoTechnique::PdfMetadata
667 }
668
669 fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
670 ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
671 }
672
673 fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
674 ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
675 self.processor
676 .embed_in_metadata(cover, payload)
677 .map_err(map_pdf_error)
678 }
679}
680
681impl ExtractTechnique for PdfMetadataStego {
682 fn technique(&self) -> StegoTechnique {
683 StegoTechnique::PdfMetadata
684 }
685
686 fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
687 ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
688 self.processor
689 .extract_from_metadata(stego)
690 .map_err(map_pdf_error)
691 }
692}
693
694#[cfg(test)]
697mod tests {
698 use super::*;
699 use tempfile::tempdir;
700
701 type TestResult = Result<(), Box<dyn std::error::Error>>;
702
703 #[test]
704 fn test_load_minimal_pdf() -> TestResult {
705 let processor = PdfProcessorImpl::default();
706 let dir = tempdir()?;
707 let path = dir.path().join("minimal.pdf");
708
709 let mut doc = Document::with_version("1.7");
711 let catalog_pages = doc.new_object_id();
712 let first_page = doc.new_object_id();
713
714 doc.objects.insert(
715 first_page,
716 Object::Dictionary(lopdf::dictionary! {
717 "Type" => "Page",
718 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
719 "Contents" => Object::Reference((first_page.0 + 1, 0)),
720 }),
721 );
722
723 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
724
725 doc.objects.insert(
726 catalog_pages,
727 Object::Dictionary(lopdf::dictionary! {
728 "Type" => "Pages",
729 "Kids" => vec![Object::Reference(first_page)],
730 "Count" => 1,
731 }),
732 );
733
734 let catalog_id = doc.add_object(lopdf::dictionary! {
735 "Type" => "Catalog",
736 "Pages" => Object::Reference(catalog_pages),
737 });
738
739 doc.trailer.set("Root", Object::Reference(catalog_id));
740 doc.save(&path)?;
741
742 let media = processor.load_pdf(&path)?;
744 assert_eq!(media.kind, CoverMediaKind::PdfDocument);
745 assert_eq!(media.metadata.get(KEY_PAGE_COUNT), Some(&"1".to_string()));
746 Ok(())
747 }
748
749 #[test]
750 #[ignore = "requires pdfium system library"]
751 fn test_render_pages_returns_correct_count() -> TestResult {
752 let processor = PdfProcessorImpl::default();
753 let dir = tempdir()?;
754 let path = dir.path().join("two_page.pdf");
755
756 let mut doc = Document::with_version("1.7");
758 let catalog_pages = doc.new_object_id();
759
760 let page1_id = doc.new_object_id();
761 doc.objects.insert(
762 page1_id,
763 Object::Dictionary(lopdf::dictionary! {
764 "Type" => "Page",
765 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
766 "Contents" => Object::Reference((page1_id.0 + 1, 0)),
767 }),
768 );
769 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
770
771 let page2_id = doc.new_object_id();
772 doc.objects.insert(
773 page2_id,
774 Object::Dictionary(lopdf::dictionary! {
775 "Type" => "Page",
776 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
777 "Contents" => Object::Reference((page2_id.0 + 1, 0)),
778 }),
779 );
780 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
781
782 doc.objects.insert(
783 catalog_pages,
784 Object::Dictionary(lopdf::dictionary! {
785 "Type" => "Pages",
786 "Kids" => vec![
787 Object::Reference(page1_id),
788 Object::Reference(page2_id),
789 ],
790 "Count" => 2,
791 }),
792 );
793
794 let catalog_id = doc.add_object(lopdf::dictionary! {
795 "Type" => "Catalog",
796 "Pages" => Object::Reference(catalog_pages),
797 });
798
799 doc.trailer.set("Root", Object::Reference(catalog_id));
800 doc.save(&path)?;
801
802 let media = processor.load_pdf(&path)?;
804 let images = processor.render_pages_to_images(&media)?;
805 assert_eq!(images.len(), 2);
806 Ok(())
807 }
808
809 #[test]
810 #[ignore = "requires pdfium system library"]
811 fn test_rebuild_pdf_roundtrip() -> TestResult {
812 let processor = PdfProcessorImpl::default();
813 let dir = tempdir()?;
814 let path = dir.path().join("original.pdf");
815
816 let mut doc = Document::with_version("1.7");
818 let catalog_pages = doc.new_object_id();
819
820 let page1_id = doc.new_object_id();
821 doc.objects.insert(
822 page1_id,
823 Object::Dictionary(lopdf::dictionary! {
824 "Type" => "Page",
825 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
826 "Contents" => Object::Reference((page1_id.0 + 1, 0)),
827 }),
828 );
829 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
830
831 let page2_id = doc.new_object_id();
832 doc.objects.insert(
833 page2_id,
834 Object::Dictionary(lopdf::dictionary! {
835 "Type" => "Page",
836 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
837 "Contents" => Object::Reference((page2_id.0 + 1, 0)),
838 }),
839 );
840 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
841
842 doc.objects.insert(
843 catalog_pages,
844 Object::Dictionary(lopdf::dictionary! {
845 "Type" => "Pages",
846 "Kids" => vec![
847 Object::Reference(page1_id),
848 Object::Reference(page2_id),
849 ],
850 "Count" => 2,
851 }),
852 );
853
854 let catalog_id = doc.add_object(lopdf::dictionary! {
855 "Type" => "Catalog",
856 "Pages" => Object::Reference(catalog_pages),
857 });
858
859 doc.trailer.set("Root", Object::Reference(catalog_id));
860 doc.save(&path)?;
861
862 let original = processor.load_pdf(&path)?;
864 let images = processor.render_pages_to_images(&original)?;
865 let rebuilt = processor.rebuild_pdf_from_images(images, &original)?;
866
867 let rebuilt_path = dir.path().join("rebuilt.pdf");
869 processor.save_pdf(&rebuilt, &rebuilt_path)?;
870 let reloaded = processor.load_pdf(&rebuilt_path)?;
871
872 assert_eq!(
873 reloaded.metadata.get(KEY_PAGE_COUNT),
874 original.metadata.get(KEY_PAGE_COUNT)
875 );
876 Ok(())
877 }
878
879 #[test]
880 #[ignore = "lopdf requires actual encrypted content, not just Encrypt trailer"]
881 fn test_encrypted_pdf_error() -> TestResult {
882 let processor = PdfProcessorImpl::default();
883 let dir = tempdir()?;
884 let path = dir.path().join("encrypted.pdf");
885
886 let mut doc = Document::with_version("1.7");
888 let catalog_pages = doc.new_object_id();
889 let first_page = doc.new_object_id();
890
891 doc.objects.insert(
892 first_page,
893 Object::Dictionary(lopdf::dictionary! {
894 "Type" => "Page",
895 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
896 "Contents" => Object::Reference((first_page.0 + 1, 0)),
897 }),
898 );
899
900 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
901
902 doc.objects.insert(
903 catalog_pages,
904 Object::Dictionary(lopdf::dictionary! {
905 "Type" => "Pages",
906 "Kids" => vec![Object::Reference(first_page)],
907 "Count" => 1,
908 }),
909 );
910
911 let catalog_id = doc.add_object(lopdf::dictionary! {
912 "Type" => "Catalog",
913 "Pages" => Object::Reference(catalog_pages),
914 });
915
916 doc.trailer.set("Root", Object::Reference(catalog_id));
917
918 doc.trailer
920 .set("Encrypt", Object::Reference((doc.max_id + 1, 0)));
921 doc.objects.insert(
922 (doc.max_id + 1, 0),
923 Object::Dictionary(lopdf::dictionary! {
924 "Filter" => "Standard",
925 "V" => 1,
926 "R" => 2,
927 }),
928 );
929
930 doc.save(&path)?;
931
932 let result = processor.load_pdf(&path);
934 assert!(matches!(result, Err(PdfError::Encrypted)));
935 Ok(())
936 }
937
938 #[test]
939 fn test_content_stream_lsb_roundtrip() -> TestResult {
940 let processor = PdfProcessorImpl::default();
941 let dir = tempdir()?;
942 let path = dir.path().join("test.pdf");
943
944 let mut doc = Document::with_version("1.7");
946 let catalog_pages = doc.new_object_id();
947 let first_page = doc.new_object_id();
948
949 doc.objects.insert(
950 first_page,
951 Object::Dictionary(lopdf::dictionary! {
952 "Type" => "Page",
953 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
954 "Contents" => Object::Reference((first_page.0 + 1, 0)),
955 }),
956 );
957
958 let content = b"BT\n/F1 12 Tf\n100 700 Td\n(Hello) Tj\n200 650 Td\n(World) Tj\n50 600 Td\n(Test) Tj\n150 550 Td\n(PDF) Tj\nET\n1 0 0 1 0 0 cm\n";
960 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, content.to_vec()));
961
962 doc.objects.insert(
963 catalog_pages,
964 Object::Dictionary(lopdf::dictionary! {
965 "Type" => "Pages",
966 "Kids" => vec![Object::Reference(first_page)],
967 "Count" => 1,
968 }),
969 );
970
971 let catalog_id = doc.add_object(lopdf::dictionary! {
972 "Type" => "Catalog",
973 "Pages" => Object::Reference(catalog_pages),
974 });
975
976 doc.trailer.set("Root", Object::Reference(catalog_id));
977 doc.save(&path)?;
978
979 let original = processor.load_pdf(&path)?;
981 let payload = Payload::from_bytes(vec![0xAB]); let stego = processor.embed_in_content_stream(original, &payload)?;
983
984 let stego_path = dir.path().join("stego.pdf");
986 processor.save_pdf(&stego, &stego_path)?;
987 let reloaded = processor.load_pdf(&stego_path)?;
988
989 let extracted = processor.extract_from_content_stream(&reloaded)?;
991 assert_eq!(extracted.as_bytes(), payload.as_bytes());
992 Ok(())
993 }
994
995 #[test]
996 fn test_metadata_embed_roundtrip() -> TestResult {
997 let processor = PdfProcessorImpl::default();
998 let dir = tempdir()?;
999 let path = dir.path().join("test.pdf");
1000
1001 let mut doc = Document::with_version("1.7");
1003 let catalog_pages = doc.new_object_id();
1004 let first_page = doc.new_object_id();
1005
1006 doc.objects.insert(
1007 first_page,
1008 Object::Dictionary(lopdf::dictionary! {
1009 "Type" => "Page",
1010 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
1011 "Contents" => Object::Reference((first_page.0 + 1, 0)),
1012 }),
1013 );
1014
1015 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
1016
1017 doc.objects.insert(
1018 catalog_pages,
1019 Object::Dictionary(lopdf::dictionary! {
1020 "Type" => "Pages",
1021 "Kids" => vec![Object::Reference(first_page)],
1022 "Count" => 1,
1023 }),
1024 );
1025
1026 let catalog_id = doc.add_object(lopdf::dictionary! {
1027 "Type" => "Catalog",
1028 "Pages" => Object::Reference(catalog_pages),
1029 });
1030
1031 doc.trailer.set("Root", Object::Reference(catalog_id));
1032 doc.save(&path)?;
1033
1034 let original = processor.load_pdf(&path)?;
1036 let payload = Payload::from_bytes(vec![0u8; 128]); let stego = processor.embed_in_metadata(original, &payload)?;
1038
1039 let stego_path = dir.path().join("stego.pdf");
1041 processor.save_pdf(&stego, &stego_path)?;
1042 let reloaded = processor.load_pdf(&stego_path)?;
1043
1044 let extracted = processor.extract_from_metadata(&reloaded)?;
1046 assert_eq!(extracted.as_bytes(), payload.as_bytes());
1047 Ok(())
1048 }
1049}