1use std::collections::HashMap;
4use std::env;
5use std::io::BufWriter;
6use std::path::Path;
7
8use base64::Engine;
9use base64::engine::general_purpose;
10use bytes::Bytes;
11use image::{DynamicImage, ImageFormat};
12use lopdf::{Document, Object, dictionary};
13use pdfium_render::prelude::*;
14
15use crate::domain::analysis::estimate_capacity;
16use crate::domain::errors::{PdfError, StegoError};
17use crate::domain::ports::{EmbedTechnique, ExtractTechnique, PdfProcessor};
18use crate::domain::types::{Capacity, CoverMedia, CoverMediaKind, Payload, StegoTechnique};
19
20const KEY_PAGE_COUNT: &str = "page_count";
22const DEFAULT_DPI: u16 = 150;
23
24#[derive(Debug)]
28pub struct PdfProcessorImpl {
29 dpi: u16,
31}
32
33impl Default for PdfProcessorImpl {
34 fn default() -> Self {
35 Self { dpi: DEFAULT_DPI }
36 }
37}
38
39impl PdfProcessorImpl {
40 #[must_use]
42 pub const fn new(dpi: u16) -> Self {
43 Self { dpi }
44 }
45
46 fn bind_pdfium() -> Result<Pdfium, PdfError> {
47 let mut bind_errors = Vec::new();
48
49 if let Some(pdfium_dir) = env::var_os("PDFIUM_DYNAMIC_LIB_PATH") {
51 let library_path = Pdfium::pdfium_platform_library_name_at_path(&pdfium_dir);
52 match Pdfium::bind_to_library(library_path) {
53 Ok(bindings) => return Ok(Pdfium::new(bindings)),
54 Err(error) => bind_errors.push(format!(
55 "PDFIUM_DYNAMIC_LIB_PATH={}: {error}",
56 Path::new(&pdfium_dir).display()
57 )),
58 }
59 }
60
61 match Pdfium::bind_to_system_library() {
63 Ok(bindings) => return Ok(Pdfium::new(bindings)),
64 Err(error) => {
65 bind_errors.push(format!("system library: {error}"));
66 }
67 }
68
69 let local_library = Pdfium::pdfium_platform_library_name_at_path("./");
71 match Pdfium::bind_to_library(local_library) {
72 Ok(bindings) => return Ok(Pdfium::new(bindings)),
73 Err(error) => bind_errors.push(format!("./: {error}")),
74 }
75
76 Err(PdfError::BindFailed {
78 reason: format!(
79 "Failed to load pdfium library. Binding attempts: {}. \
80 Download a prebuilt binary from https://github.com/bblanchon/pdfium-binaries/, \
81 set PDFIUM_DYNAMIC_LIB_PATH, or disable the 'pdf' feature with --no-default-features --features corpus,adaptive.",
82 bind_errors.join("; ")
83 ),
84 })
85 }
86}
87
88impl PdfProcessor for PdfProcessorImpl {
89 fn load_pdf(&self, path: &Path) -> Result<CoverMedia, PdfError> {
90 let doc = Document::load(path).map_err(|e| PdfError::ParseFailed {
92 reason: e.to_string(),
93 })?;
94
95 if doc.is_encrypted() {
97 return Err(PdfError::Encrypted);
98 }
99
100 let page_count = doc.get_pages().len();
102
103 let bytes = std::fs::read(path).map_err(|e| PdfError::IoError {
105 reason: e.to_string(),
106 })?;
107
108 let mut metadata = HashMap::new();
110 metadata.insert(KEY_PAGE_COUNT.to_string(), page_count.to_string());
111
112 Ok(CoverMedia {
113 kind: CoverMediaKind::PdfDocument,
114 data: Bytes::from(bytes),
115 metadata,
116 })
117 }
118
119 fn save_pdf(&self, media: &CoverMedia, path: &Path) -> Result<(), PdfError> {
120 std::fs::write(path, &media.data).map_err(|e| PdfError::IoError {
122 reason: e.to_string(),
123 })?;
124
125 Ok(())
126 }
127
128 fn render_pages_to_images(&self, pdf: &CoverMedia) -> Result<Vec<CoverMedia>, PdfError> {
129 let pdfium = Self::bind_pdfium()?;
131
132 let document = pdfium
134 .load_pdf_from_byte_vec(pdf.data.to_vec(), None)
135 .map_err(|e| PdfError::ParseFailed {
136 reason: e.to_string(),
137 })?;
138
139 let page_count = document.pages().len();
140 let mut images = Vec::with_capacity(page_count as usize);
141
142 for page_index in 0..page_count {
144 let page = document
145 .pages()
146 .get(page_index)
147 .map_err(|e| PdfError::RenderFailed {
148 page: page_index as usize,
149 reason: e.to_string(),
150 })?;
151
152 #[expect(
154 clippy::cast_possible_truncation,
155 reason = "DPI calculation for render"
156 )]
157 let target_width = (page.width().value * f32::from(self.dpi) / 72.0) as i32;
158
159 let bitmap = page
160 .render_with_config(&PdfRenderConfig::new().set_target_width(target_width))
161 .map_err(|e| PdfError::RenderFailed {
162 page: page_index as usize,
163 reason: e.to_string(),
164 })?;
165
166 let width = bitmap.width().cast_unsigned();
168 let height = bitmap.height().cast_unsigned();
169 let rgba_data = bitmap.as_rgba_bytes();
170
171 let img =
172 image::RgbaImage::from_raw(width, height, rgba_data.clone()).ok_or_else(|| {
173 PdfError::RenderFailed {
174 page: page_index as usize,
175 reason: "invalid bitmap dimensions".to_string(),
176 }
177 })?;
178
179 let mut metadata = HashMap::new();
181 metadata.insert("width".to_string(), width.to_string());
182 metadata.insert("height".to_string(), height.to_string());
183 metadata.insert("format".to_string(), "Png".to_string());
184 metadata.insert("page_index".to_string(), page_index.to_string());
185
186 images.push(CoverMedia {
187 kind: CoverMediaKind::PngImage,
188 data: Bytes::from(img.into_raw()),
189 metadata,
190 });
191 }
192
193 Ok(images)
194 }
195
196 #[expect(
197 clippy::too_many_lines,
198 reason = "PDF reconstruction logic is inherently complex"
199 )]
200 fn rebuild_pdf_from_images(
201 &self,
202 images: Vec<CoverMedia>,
203 _original: &CoverMedia,
204 ) -> Result<CoverMedia, PdfError> {
205 let mut doc = Document::with_version("1.7");
207
208 for (page_index, img_media) in images.iter().enumerate() {
210 let width: u32 = img_media
212 .metadata
213 .get("width")
214 .ok_or_else(|| PdfError::RebuildFailed {
215 reason: "missing width metadata".to_string(),
216 })?
217 .parse()
218 .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
219 reason: e.to_string(),
220 })?;
221
222 let height: u32 = img_media
223 .metadata
224 .get("height")
225 .ok_or_else(|| PdfError::RebuildFailed {
226 reason: "missing height metadata".to_string(),
227 })?
228 .parse()
229 .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
230 reason: e.to_string(),
231 })?;
232
233 let img = image::RgbaImage::from_raw(width, height, img_media.data.to_vec())
235 .ok_or_else(|| PdfError::RebuildFailed {
236 reason: "invalid image dimensions or data length".to_string(),
237 })?;
238
239 let dynamic_img = DynamicImage::ImageRgba8(img);
240 let mut png_bytes = Vec::new();
241 dynamic_img
242 .write_to(&mut std::io::Cursor::new(&mut png_bytes), ImageFormat::Png)
243 .map_err(|e| PdfError::RebuildFailed {
244 reason: e.to_string(),
245 })?;
246
247 #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
249 let page_width = width as f32 * 72.0 / f32::from(self.dpi);
250 #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
251 let page_height = height as f32 * 72.0 / f32::from(self.dpi);
252
253 let page_id = doc.new_object_id();
254 let page = doc.add_object(lopdf::dictionary! {
255 "Type" => "Page",
256 "MediaBox" => vec![0.into(), 0.into(), page_width.into(), page_height.into()],
257 "Contents" => Object::Reference((page_id.0 + 1, 0)),
258 "Resources" => lopdf::dictionary! {
259 "XObject" => lopdf::dictionary! {
260 "Image1" => Object::Reference((page_id.0 + 2, 0)),
261 },
262 },
263 });
264
265 let content = format!("q\n{page_width} 0 0 {page_height} 0 0 cm\n/Image1 Do\nQ");
267 let content_id = doc.add_object(lopdf::Stream::new(
268 lopdf::dictionary! {},
269 content.into_bytes(),
270 ));
271
272 let image_id = doc.add_object(lopdf::Stream::new(
274 lopdf::dictionary! {
275 "Type" => "XObject",
276 "Subtype" => "Image",
277 "Width" => i64::from(width),
278 "Height" => i64::from(height),
279 "ColorSpace" => "DeviceRGB",
280 "BitsPerComponent" => 8,
281 "Filter" => "FlateDecode",
282 },
283 png_bytes,
284 ));
285
286 assert_eq!(page, (page_id.0, 0));
288 assert_eq!(content_id, (page_id.0 + 1, 0));
289 assert_eq!(image_id, (page_id.0 + 2, 0));
290
291 if doc.catalog().is_err() {
293 let pages_obj_id = doc.new_object_id();
295 let catalog_id = doc.add_object(lopdf::dictionary! {
296 "Type" => "Catalog",
297 "Pages" => Object::Reference(pages_obj_id),
298 });
299 doc.trailer.set("Root", Object::Reference(catalog_id));
300
301 doc.objects.insert(
302 pages_obj_id,
303 lopdf::Object::Dictionary(lopdf::dictionary! {
304 "Type" => "Pages",
305 "Kids" => vec![Object::Reference(page)],
306 "Count" => 1,
307 }),
308 );
309 } else {
310 if let Ok(pages_ref) = doc.catalog().and_then(|c| c.get(b"Pages"))
312 && let Ok(pages_obj_id) = pages_ref.as_reference()
313 && let Ok(pages_dict) = doc.get_object_mut(pages_obj_id)
314 && let Object::Dictionary(dict) = pages_dict
315 {
316 let mut kids = if let Ok(Object::Array(arr)) = dict.get(b"Kids") {
318 arr.clone()
319 } else {
320 vec![]
321 };
322 kids.push(Object::Reference(page));
323
324 dict.set("Kids", Object::Array(kids));
325 #[expect(clippy::cast_possible_wrap, reason = "page count fits in i64")]
326 dict.set("Count", (page_index + 1) as i64);
327 }
328 }
329 }
330
331 let mut pdf_bytes = Vec::new();
333 doc.save_to(&mut BufWriter::new(&mut pdf_bytes))
334 .map_err(|e| PdfError::RebuildFailed {
335 reason: e.to_string(),
336 })?;
337
338 let mut metadata = HashMap::new();
340 metadata.insert(KEY_PAGE_COUNT.to_string(), images.len().to_string());
341
342 Ok(CoverMedia {
343 kind: CoverMediaKind::PdfDocument,
344 data: Bytes::from(pdf_bytes),
345 metadata,
346 })
347 }
348
349 fn embed_in_content_stream(
350 &self,
351 pdf: CoverMedia,
352 payload: &Payload,
353 ) -> Result<CoverMedia, PdfError> {
354 let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
356 reason: e.to_string(),
357 })?;
358
359 let payload_bits: Vec<u8> = payload
361 .as_bytes()
362 .iter()
363 .flat_map(|byte| (0..8).rev().map(move |i| (byte >> i) & 1))
364 .collect();
365
366 let mut bit_index = 0;
367
368 let object_ids: Vec<_> = doc.objects.keys().copied().collect();
370 for obj_id in object_ids {
371 if bit_index >= payload_bits.len() {
372 break;
373 }
374
375 if let Ok(obj) = doc.get_object_mut(obj_id)
376 && let Object::Stream(stream) = obj
377 {
378 let content = String::from_utf8_lossy(&stream.content);
380 let mut modified_content = String::new();
381 let mut tokens: Vec<&str> = content.split_whitespace().collect();
382
383 for token in &mut tokens {
384 if bit_index >= payload_bits.len() {
385 modified_content.push_str(token);
386 modified_content.push(' ');
387 continue;
388 }
389
390 if let Ok(mut num) = token.parse::<i32>() {
392 if let Some(&bit) = payload_bits.get(bit_index) {
394 if bit == 1 {
395 num |= 1; } else {
397 num &= !1; }
399 }
400 modified_content.push_str(&num.to_string());
401 bit_index += 1;
402 } else {
403 modified_content.push_str(token);
404 }
405 modified_content.push(' ');
406 }
407
408 stream.set_content(modified_content.trim().as_bytes().to_vec());
410 }
411 }
412
413 if bit_index < payload_bits.len() {
414 return Err(PdfError::EmbedFailed {
415 reason: format!(
416 "insufficient capacity: embedded {bit_index}/{} bits",
417 payload_bits.len()
418 ),
419 });
420 }
421
422 let mut pdf_bytes = Vec::new();
424 doc.save_to(&mut pdf_bytes)
425 .map_err(|e| PdfError::EmbedFailed {
426 reason: e.to_string(),
427 })?;
428
429 Ok(CoverMedia {
430 kind: pdf.kind,
431 data: Bytes::from(pdf_bytes),
432 metadata: pdf.metadata,
433 })
434 }
435
436 fn extract_from_content_stream(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
437 let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
439 reason: e.to_string(),
440 })?;
441
442 let mut extracted_bits = Vec::new();
443
444 for obj in doc.objects.values() {
446 if let Object::Stream(stream) = obj {
447 let content = String::from_utf8_lossy(&stream.content);
449 let tokens: Vec<&str> = content.split_whitespace().collect();
450
451 for token in tokens {
452 if let Ok(num) = token.parse::<i32>() {
454 #[expect(clippy::cast_sign_loss, reason = "LSB is always 0 or 1")]
456 extracted_bits.push((num & 1) as u8);
457 }
458 }
459 }
460 }
461
462 if extracted_bits.is_empty() {
464 return Err(PdfError::ExtractFailed {
465 reason: "no numeric values found in content streams".to_string(),
466 });
467 }
468
469 let mut payload_bytes = Vec::new();
470 for chunk in extracted_bits.chunks(8) {
471 if chunk.len() == 8 {
472 let mut byte = 0u8;
473 for (i, bit) in chunk.iter().enumerate() {
474 byte |= bit << (7 - i);
475 }
476 payload_bytes.push(byte);
477 }
478 }
479
480 Ok(Payload::from_bytes(payload_bytes))
481 }
482
483 fn embed_in_metadata(
484 &self,
485 pdf: CoverMedia,
486 payload: &Payload,
487 ) -> Result<CoverMedia, PdfError> {
488 let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
490 reason: e.to_string(),
491 })?;
492
493 let encoded = general_purpose::STANDARD.encode(payload.as_bytes());
495
496 let xmp_content = format!(
498 r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
499<x:xmpmeta xmlns:x="adobe:ns:meta/">
500 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
501 <rdf:Description rdf:about=""
502 xmlns:sf="http://shadowforge.org/ns/1.0/">
503 <sf:HiddenData>{encoded}</sf:HiddenData>
504 </rdf:Description>
505 </rdf:RDF>
506</x:xmpmeta>
507<?xpacket end="w"?>"#
508 );
509
510 let metadata_id = doc.add_object(lopdf::Stream::new(
512 lopdf::dictionary! {
513 "Type" => "Metadata",
514 "Subtype" => "XML",
515 },
516 xmp_content.into_bytes(),
517 ));
518
519 if let Ok(catalog) = doc.catalog_mut() {
521 catalog.set("Metadata", Object::Reference(metadata_id));
522 } else {
523 return Err(PdfError::EmbedFailed {
524 reason: "failed to access catalog".to_string(),
525 });
526 }
527
528 let mut pdf_bytes = Vec::new();
530 doc.save_to(&mut pdf_bytes)
531 .map_err(|e| PdfError::EmbedFailed {
532 reason: e.to_string(),
533 })?;
534
535 Ok(CoverMedia {
536 kind: pdf.kind,
537 data: Bytes::from(pdf_bytes),
538 metadata: pdf.metadata,
539 })
540 }
541
542 fn extract_from_metadata(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
543 let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
545 reason: e.to_string(),
546 })?;
547
548 let catalog = doc.catalog().map_err(|e| PdfError::ExtractFailed {
550 reason: format!("failed to access catalog: {e}"),
551 })?;
552
553 let metadata_ref = catalog
555 .get(b"Metadata")
556 .map_err(|_| PdfError::ExtractFailed {
557 reason: "no metadata found in catalog".to_string(),
558 })?
559 .as_reference()
560 .map_err(|_| PdfError::ExtractFailed {
561 reason: "metadata is not a reference".to_string(),
562 })?;
563
564 let metadata_obj = doc
566 .get_object(metadata_ref)
567 .map_err(|e| PdfError::ExtractFailed {
568 reason: format!("failed to get metadata object: {e}"),
569 })?;
570
571 let metadata_stream = metadata_obj
572 .as_stream()
573 .map_err(|_| PdfError::ExtractFailed {
574 reason: "metadata is not a stream".to_string(),
575 })?;
576
577 let xmp_content = String::from_utf8_lossy(&metadata_stream.content);
579
580 let start_tag = "<sf:HiddenData>";
582 let end_tag = "</sf:HiddenData>";
583
584 let start_idx = xmp_content
585 .find(start_tag)
586 .ok_or_else(|| PdfError::ExtractFailed {
587 reason: "no sf:HiddenData tag found".to_string(),
588 })?
589 .strict_add(start_tag.len());
590
591 let end_idx = xmp_content
592 .find(end_tag)
593 .ok_or_else(|| PdfError::ExtractFailed {
594 reason: "no closing sf:HiddenData tag found".to_string(),
595 })?;
596
597 let encoded_data = &xmp_content[start_idx..end_idx];
598
599 let decoded = general_purpose::STANDARD
601 .decode(encoded_data.trim())
602 .map_err(|e| PdfError::ExtractFailed {
603 reason: format!("base64 decode failed: {e}"),
604 })?;
605
606 Ok(Payload::from_bytes(decoded))
607 }
608}
609
610fn ensure_pdf_cover(cover: &CoverMedia, technique: StegoTechnique) -> Result<Capacity, StegoError> {
611 if cover.kind != CoverMediaKind::PdfDocument {
612 return Err(StegoError::UnsupportedCoverType {
613 reason: format!("{technique:?} requires a PDF cover"),
614 });
615 }
616
617 Ok(Capacity {
618 bytes: estimate_capacity(cover, technique),
619 technique,
620 })
621}
622
623fn map_pdf_error(error: PdfError) -> StegoError {
624 match error {
625 PdfError::Encrypted => StegoError::UnsupportedCoverType {
626 reason: "encrypted PDF documents are not supported".to_string(),
627 },
628 PdfError::ExtractFailed { .. } => StegoError::NoPayloadFound,
629 PdfError::RenderFailed { page, reason } => StegoError::MalformedCoverData {
630 reason: format!("pdf render failed on page {page}: {reason}"),
631 },
632 PdfError::ParseFailed { reason }
633 | PdfError::RebuildFailed { reason }
634 | PdfError::EmbedFailed { reason }
635 | PdfError::IoError { reason } => StegoError::MalformedCoverData {
636 reason: format!("pdf processing failed: {reason}"),
637 },
638 PdfError::BindFailed { reason } => StegoError::UnsupportedCoverType {
639 reason: format!("pdfium library is not available: {reason}"),
640 },
641 }
642}
643
644#[derive(Debug, Default)]
646pub struct PdfContentStreamStego {
647 processor: PdfProcessorImpl,
648}
649
650impl PdfContentStreamStego {
651 #[must_use]
653 pub fn new() -> Self {
654 Self::default()
655 }
656}
657
658impl EmbedTechnique for PdfContentStreamStego {
659 fn technique(&self) -> StegoTechnique {
660 StegoTechnique::PdfContentStream
661 }
662
663 fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
664 ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
665 }
666
667 fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
668 ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
669 self.processor
670 .embed_in_content_stream(cover, payload)
671 .map_err(map_pdf_error)
672 }
673}
674
675impl ExtractTechnique for PdfContentStreamStego {
676 fn technique(&self) -> StegoTechnique {
677 StegoTechnique::PdfContentStream
678 }
679
680 fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
681 ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
682 self.processor
683 .extract_from_content_stream(stego)
684 .map_err(map_pdf_error)
685 }
686}
687
688#[derive(Debug, Default)]
690pub struct PdfMetadataStego {
691 processor: PdfProcessorImpl,
692}
693
694impl PdfMetadataStego {
695 #[must_use]
697 pub fn new() -> Self {
698 Self::default()
699 }
700}
701
702impl EmbedTechnique for PdfMetadataStego {
703 fn technique(&self) -> StegoTechnique {
704 StegoTechnique::PdfMetadata
705 }
706
707 fn capacity(&self, cover: &CoverMedia) -> Result<Capacity, StegoError> {
708 ensure_pdf_cover(cover, <Self as EmbedTechnique>::technique(self))
709 }
710
711 fn embed(&self, cover: CoverMedia, payload: &Payload) -> Result<CoverMedia, StegoError> {
712 ensure_pdf_cover(&cover, <Self as EmbedTechnique>::technique(self))?;
713 self.processor
714 .embed_in_metadata(cover, payload)
715 .map_err(map_pdf_error)
716 }
717}
718
719impl ExtractTechnique for PdfMetadataStego {
720 fn technique(&self) -> StegoTechnique {
721 StegoTechnique::PdfMetadata
722 }
723
724 fn extract(&self, stego: &CoverMedia) -> Result<Payload, StegoError> {
725 ensure_pdf_cover(stego, <Self as ExtractTechnique>::technique(self))?;
726 self.processor
727 .extract_from_metadata(stego)
728 .map_err(map_pdf_error)
729 }
730}
731
732#[cfg(test)]
735mod tests {
736 use super::*;
737 use tempfile::tempdir;
738
739 type TestResult = Result<(), Box<dyn std::error::Error>>;
740
741 #[test]
742 fn test_load_minimal_pdf() -> TestResult {
743 let processor = PdfProcessorImpl::default();
744 let dir = tempdir()?;
745 let path = dir.path().join("minimal.pdf");
746
747 let mut doc = Document::with_version("1.7");
749 let catalog_pages = doc.new_object_id();
750 let first_page = doc.new_object_id();
751
752 doc.objects.insert(
753 first_page,
754 Object::Dictionary(lopdf::dictionary! {
755 "Type" => "Page",
756 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
757 "Contents" => Object::Reference((first_page.0 + 1, 0)),
758 }),
759 );
760
761 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
762
763 doc.objects.insert(
764 catalog_pages,
765 Object::Dictionary(lopdf::dictionary! {
766 "Type" => "Pages",
767 "Kids" => vec![Object::Reference(first_page)],
768 "Count" => 1,
769 }),
770 );
771
772 let catalog_id = doc.add_object(lopdf::dictionary! {
773 "Type" => "Catalog",
774 "Pages" => Object::Reference(catalog_pages),
775 });
776
777 doc.trailer.set("Root", Object::Reference(catalog_id));
778 doc.save(&path)?;
779
780 let media = processor.load_pdf(&path)?;
782 assert_eq!(media.kind, CoverMediaKind::PdfDocument);
783 assert_eq!(media.metadata.get(KEY_PAGE_COUNT), Some(&"1".to_string()));
784 Ok(())
785 }
786
787 #[test]
788 #[ignore = "requires pdfium system library"]
789 fn test_render_pages_returns_correct_count() -> TestResult {
790 let processor = PdfProcessorImpl::default();
791 let dir = tempdir()?;
792 let path = dir.path().join("two_page.pdf");
793
794 let mut doc = Document::with_version("1.7");
796 let catalog_pages = doc.new_object_id();
797
798 let page1_id = doc.new_object_id();
799 doc.objects.insert(
800 page1_id,
801 Object::Dictionary(lopdf::dictionary! {
802 "Type" => "Page",
803 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
804 "Contents" => Object::Reference((page1_id.0 + 1, 0)),
805 }),
806 );
807 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
808
809 let page2_id = doc.new_object_id();
810 doc.objects.insert(
811 page2_id,
812 Object::Dictionary(lopdf::dictionary! {
813 "Type" => "Page",
814 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
815 "Contents" => Object::Reference((page2_id.0 + 1, 0)),
816 }),
817 );
818 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
819
820 doc.objects.insert(
821 catalog_pages,
822 Object::Dictionary(lopdf::dictionary! {
823 "Type" => "Pages",
824 "Kids" => vec![
825 Object::Reference(page1_id),
826 Object::Reference(page2_id),
827 ],
828 "Count" => 2,
829 }),
830 );
831
832 let catalog_id = doc.add_object(lopdf::dictionary! {
833 "Type" => "Catalog",
834 "Pages" => Object::Reference(catalog_pages),
835 });
836
837 doc.trailer.set("Root", Object::Reference(catalog_id));
838 doc.save(&path)?;
839
840 let media = processor.load_pdf(&path)?;
842 let images = processor.render_pages_to_images(&media)?;
843 assert_eq!(images.len(), 2);
844 Ok(())
845 }
846
847 #[test]
848 #[ignore = "requires pdfium system library"]
849 fn test_rebuild_pdf_roundtrip() -> TestResult {
850 let processor = PdfProcessorImpl::default();
851 let dir = tempdir()?;
852 let path = dir.path().join("original.pdf");
853
854 let mut doc = Document::with_version("1.7");
856 let catalog_pages = doc.new_object_id();
857
858 let page1_id = doc.new_object_id();
859 doc.objects.insert(
860 page1_id,
861 Object::Dictionary(lopdf::dictionary! {
862 "Type" => "Page",
863 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
864 "Contents" => Object::Reference((page1_id.0 + 1, 0)),
865 }),
866 );
867 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
868
869 let page2_id = doc.new_object_id();
870 doc.objects.insert(
871 page2_id,
872 Object::Dictionary(lopdf::dictionary! {
873 "Type" => "Page",
874 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
875 "Contents" => Object::Reference((page2_id.0 + 1, 0)),
876 }),
877 );
878 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
879
880 doc.objects.insert(
881 catalog_pages,
882 Object::Dictionary(lopdf::dictionary! {
883 "Type" => "Pages",
884 "Kids" => vec![
885 Object::Reference(page1_id),
886 Object::Reference(page2_id),
887 ],
888 "Count" => 2,
889 }),
890 );
891
892 let catalog_id = doc.add_object(lopdf::dictionary! {
893 "Type" => "Catalog",
894 "Pages" => Object::Reference(catalog_pages),
895 });
896
897 doc.trailer.set("Root", Object::Reference(catalog_id));
898 doc.save(&path)?;
899
900 let original = processor.load_pdf(&path)?;
902 let images = processor.render_pages_to_images(&original)?;
903 let rebuilt = processor.rebuild_pdf_from_images(images, &original)?;
904
905 let rebuilt_path = dir.path().join("rebuilt.pdf");
907 processor.save_pdf(&rebuilt, &rebuilt_path)?;
908 let reloaded = processor.load_pdf(&rebuilt_path)?;
909
910 assert_eq!(
911 reloaded.metadata.get(KEY_PAGE_COUNT),
912 original.metadata.get(KEY_PAGE_COUNT)
913 );
914 Ok(())
915 }
916
917 #[test]
918 #[ignore = "lopdf requires actual encrypted content, not just Encrypt trailer"]
919 fn test_encrypted_pdf_error() -> TestResult {
920 let processor = PdfProcessorImpl::default();
921 let dir = tempdir()?;
922 let path = dir.path().join("encrypted.pdf");
923
924 let mut doc = Document::with_version("1.7");
926 let catalog_pages = doc.new_object_id();
927 let first_page = doc.new_object_id();
928
929 doc.objects.insert(
930 first_page,
931 Object::Dictionary(lopdf::dictionary! {
932 "Type" => "Page",
933 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
934 "Contents" => Object::Reference((first_page.0 + 1, 0)),
935 }),
936 );
937
938 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
939
940 doc.objects.insert(
941 catalog_pages,
942 Object::Dictionary(lopdf::dictionary! {
943 "Type" => "Pages",
944 "Kids" => vec![Object::Reference(first_page)],
945 "Count" => 1,
946 }),
947 );
948
949 let catalog_id = doc.add_object(lopdf::dictionary! {
950 "Type" => "Catalog",
951 "Pages" => Object::Reference(catalog_pages),
952 });
953
954 doc.trailer.set("Root", Object::Reference(catalog_id));
955
956 doc.trailer
958 .set("Encrypt", Object::Reference((doc.max_id + 1, 0)));
959 doc.objects.insert(
960 (doc.max_id + 1, 0),
961 Object::Dictionary(lopdf::dictionary! {
962 "Filter" => "Standard",
963 "V" => 1,
964 "R" => 2,
965 }),
966 );
967
968 doc.save(&path)?;
969
970 let result = processor.load_pdf(&path);
972 assert!(matches!(result, Err(PdfError::Encrypted)));
973 Ok(())
974 }
975
976 #[test]
977 fn test_content_stream_lsb_roundtrip() -> TestResult {
978 let processor = PdfProcessorImpl::default();
979 let dir = tempdir()?;
980 let path = dir.path().join("test.pdf");
981
982 let mut doc = Document::with_version("1.7");
984 let catalog_pages = doc.new_object_id();
985 let first_page = doc.new_object_id();
986
987 doc.objects.insert(
988 first_page,
989 Object::Dictionary(lopdf::dictionary! {
990 "Type" => "Page",
991 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
992 "Contents" => Object::Reference((first_page.0 + 1, 0)),
993 }),
994 );
995
996 let content = b"BT\n/F1 12 Tf\n100 700 Td\n(Hello) Tj\n200 650 Td\n(World) Tj\n50 600 Td\n(Test) Tj\n150 550 Td\n(PDF) Tj\nET\n1 0 0 1 0 0 cm\n";
998 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, content.to_vec()));
999
1000 doc.objects.insert(
1001 catalog_pages,
1002 Object::Dictionary(lopdf::dictionary! {
1003 "Type" => "Pages",
1004 "Kids" => vec![Object::Reference(first_page)],
1005 "Count" => 1,
1006 }),
1007 );
1008
1009 let catalog_id = doc.add_object(lopdf::dictionary! {
1010 "Type" => "Catalog",
1011 "Pages" => Object::Reference(catalog_pages),
1012 });
1013
1014 doc.trailer.set("Root", Object::Reference(catalog_id));
1015 doc.save(&path)?;
1016
1017 let original = processor.load_pdf(&path)?;
1019 let payload = Payload::from_bytes(vec![0xAB]); let stego = processor.embed_in_content_stream(original, &payload)?;
1021
1022 let stego_path = dir.path().join("stego.pdf");
1024 processor.save_pdf(&stego, &stego_path)?;
1025 let reloaded = processor.load_pdf(&stego_path)?;
1026
1027 let extracted = processor.extract_from_content_stream(&reloaded)?;
1029 assert_eq!(extracted.as_bytes(), payload.as_bytes());
1030 Ok(())
1031 }
1032
1033 #[test]
1034 fn test_metadata_embed_roundtrip() -> TestResult {
1035 let processor = PdfProcessorImpl::default();
1036 let dir = tempdir()?;
1037 let path = dir.path().join("test.pdf");
1038
1039 let mut doc = Document::with_version("1.7");
1041 let catalog_pages = doc.new_object_id();
1042 let first_page = doc.new_object_id();
1043
1044 doc.objects.insert(
1045 first_page,
1046 Object::Dictionary(lopdf::dictionary! {
1047 "Type" => "Page",
1048 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
1049 "Contents" => Object::Reference((first_page.0 + 1, 0)),
1050 }),
1051 );
1052
1053 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
1054
1055 doc.objects.insert(
1056 catalog_pages,
1057 Object::Dictionary(lopdf::dictionary! {
1058 "Type" => "Pages",
1059 "Kids" => vec![Object::Reference(first_page)],
1060 "Count" => 1,
1061 }),
1062 );
1063
1064 let catalog_id = doc.add_object(lopdf::dictionary! {
1065 "Type" => "Catalog",
1066 "Pages" => Object::Reference(catalog_pages),
1067 });
1068
1069 doc.trailer.set("Root", Object::Reference(catalog_id));
1070 doc.save(&path)?;
1071
1072 let original = processor.load_pdf(&path)?;
1074 let payload = Payload::from_bytes(vec![0u8; 128]); let stego = processor.embed_in_metadata(original, &payload)?;
1076
1077 let stego_path = dir.path().join("stego.pdf");
1079 processor.save_pdf(&stego, &stego_path)?;
1080 let reloaded = processor.load_pdf(&stego_path)?;
1081
1082 let extracted = processor.extract_from_metadata(&reloaded)?;
1084 assert_eq!(extracted.as_bytes(), payload.as_bytes());
1085 Ok(())
1086 }
1087}