1use std::collections::HashMap;
4use std::io::BufWriter;
5use std::path::Path;
6
7use base64::Engine;
8use base64::engine::general_purpose;
9use bytes::Bytes;
10use image::{DynamicImage, ImageFormat};
11use lopdf::{Document, Object, dictionary};
12use pdfium_render::prelude::*;
13
14use crate::domain::errors::PdfError;
15use crate::domain::ports::PdfProcessor;
16use crate::domain::types::{CoverMedia, CoverMediaKind, Payload};
17
18const KEY_PAGE_COUNT: &str = "page_count";
20const DEFAULT_DPI: u16 = 150;
21
22#[derive(Debug)]
26pub struct PdfProcessorImpl {
27 dpi: u16,
29}
30
31impl Default for PdfProcessorImpl {
32 fn default() -> Self {
33 Self { dpi: DEFAULT_DPI }
34 }
35}
36
37impl PdfProcessorImpl {
38 #[must_use]
40 pub const fn new(dpi: u16) -> Self {
41 Self { dpi }
42 }
43}
44
45impl PdfProcessor for PdfProcessorImpl {
46 fn load_pdf(&self, path: &Path) -> Result<CoverMedia, PdfError> {
47 let doc = Document::load(path).map_err(|e| PdfError::ParseFailed {
49 reason: e.to_string(),
50 })?;
51
52 if doc.is_encrypted() {
54 return Err(PdfError::Encrypted);
55 }
56
57 let page_count = doc.get_pages().len();
59
60 let bytes = std::fs::read(path).map_err(|e| PdfError::IoError {
62 reason: e.to_string(),
63 })?;
64
65 let mut metadata = HashMap::new();
67 metadata.insert(KEY_PAGE_COUNT.to_string(), page_count.to_string());
68
69 Ok(CoverMedia {
70 kind: CoverMediaKind::PdfDocument,
71 data: Bytes::from(bytes),
72 metadata,
73 })
74 }
75
76 fn save_pdf(&self, media: &CoverMedia, path: &Path) -> Result<(), PdfError> {
77 std::fs::write(path, &media.data).map_err(|e| PdfError::IoError {
79 reason: e.to_string(),
80 })?;
81
82 Ok(())
83 }
84
85 fn render_pages_to_images(&self, pdf: &CoverMedia) -> Result<Vec<CoverMedia>, PdfError> {
86 let pdfium = Pdfium::new(
88 Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./"))
89 .or_else(|_| Pdfium::bind_to_system_library())
90 .map_err(|e| PdfError::RenderFailed {
91 page: 0,
92 reason: format!("Failed to load pdfium library: {e}"),
93 })?,
94 );
95
96 let document = pdfium
98 .load_pdf_from_byte_vec(pdf.data.to_vec(), None)
99 .map_err(|e| PdfError::ParseFailed {
100 reason: e.to_string(),
101 })?;
102
103 let page_count = document.pages().len();
104 let mut images = Vec::with_capacity(page_count as usize);
105
106 for page_index in 0..page_count {
108 let page = document
109 .pages()
110 .get(page_index)
111 .map_err(|e| PdfError::RenderFailed {
112 page: page_index as usize,
113 reason: e.to_string(),
114 })?;
115
116 #[expect(
118 clippy::cast_possible_truncation,
119 reason = "DPI calculation for render"
120 )]
121 let target_width = (page.width().value * f32::from(self.dpi) / 72.0) as i32;
122
123 let bitmap = page
124 .render_with_config(&PdfRenderConfig::new().set_target_width(target_width))
125 .map_err(|e| PdfError::RenderFailed {
126 page: page_index as usize,
127 reason: e.to_string(),
128 })?;
129
130 let width = bitmap.width().cast_unsigned();
132 let height = bitmap.height().cast_unsigned();
133 let rgba_data = bitmap.as_rgba_bytes();
134
135 let img =
136 image::RgbaImage::from_raw(width, height, rgba_data.clone()).ok_or_else(|| {
137 PdfError::RenderFailed {
138 page: page_index as usize,
139 reason: "invalid bitmap dimensions".to_string(),
140 }
141 })?;
142
143 let mut metadata = HashMap::new();
145 metadata.insert("width".to_string(), width.to_string());
146 metadata.insert("height".to_string(), height.to_string());
147 metadata.insert("format".to_string(), "Png".to_string());
148 metadata.insert("page_index".to_string(), page_index.to_string());
149
150 images.push(CoverMedia {
151 kind: CoverMediaKind::PngImage,
152 data: Bytes::from(img.into_raw()),
153 metadata,
154 });
155 }
156
157 Ok(images)
158 }
159
160 #[expect(
161 clippy::too_many_lines,
162 reason = "PDF reconstruction logic is inherently complex"
163 )]
164 fn rebuild_pdf_from_images(
165 &self,
166 images: Vec<CoverMedia>,
167 _original: &CoverMedia,
168 ) -> Result<CoverMedia, PdfError> {
169 let mut doc = Document::with_version("1.7");
171
172 for (page_index, img_media) in images.iter().enumerate() {
174 let width: u32 = img_media
176 .metadata
177 .get("width")
178 .ok_or_else(|| PdfError::RebuildFailed {
179 reason: "missing width metadata".to_string(),
180 })?
181 .parse()
182 .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
183 reason: e.to_string(),
184 })?;
185
186 let height: u32 = img_media
187 .metadata
188 .get("height")
189 .ok_or_else(|| PdfError::RebuildFailed {
190 reason: "missing height metadata".to_string(),
191 })?
192 .parse()
193 .map_err(|e: std::num::ParseIntError| PdfError::RebuildFailed {
194 reason: e.to_string(),
195 })?;
196
197 let img = image::RgbaImage::from_raw(width, height, img_media.data.to_vec())
199 .ok_or_else(|| PdfError::RebuildFailed {
200 reason: "invalid image dimensions or data length".to_string(),
201 })?;
202
203 let dynamic_img = DynamicImage::ImageRgba8(img);
204 let mut png_bytes = Vec::new();
205 dynamic_img
206 .write_to(&mut std::io::Cursor::new(&mut png_bytes), ImageFormat::Png)
207 .map_err(|e| PdfError::RebuildFailed {
208 reason: e.to_string(),
209 })?;
210
211 #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
213 let page_width = width as f32 * 72.0 / f32::from(self.dpi);
214 #[expect(clippy::cast_precision_loss, reason = "image dimensions to PDF points")]
215 let page_height = height as f32 * 72.0 / f32::from(self.dpi);
216
217 let page_id = doc.new_object_id();
218 let page = doc.add_object(lopdf::dictionary! {
219 "Type" => "Page",
220 "MediaBox" => vec![0.into(), 0.into(), page_width.into(), page_height.into()],
221 "Contents" => Object::Reference((page_id.0 + 1, 0)),
222 "Resources" => lopdf::dictionary! {
223 "XObject" => lopdf::dictionary! {
224 "Image1" => Object::Reference((page_id.0 + 2, 0)),
225 },
226 },
227 });
228
229 let content = format!("q\n{page_width} 0 0 {page_height} 0 0 cm\n/Image1 Do\nQ");
231 let content_id = doc.add_object(lopdf::Stream::new(
232 lopdf::dictionary! {},
233 content.into_bytes(),
234 ));
235
236 let image_id = doc.add_object(lopdf::Stream::new(
238 lopdf::dictionary! {
239 "Type" => "XObject",
240 "Subtype" => "Image",
241 "Width" => i64::from(width),
242 "Height" => i64::from(height),
243 "ColorSpace" => "DeviceRGB",
244 "BitsPerComponent" => 8,
245 "Filter" => "FlateDecode",
246 },
247 png_bytes,
248 ));
249
250 assert_eq!(page, (page_id.0, 0));
252 assert_eq!(content_id, (page_id.0 + 1, 0));
253 assert_eq!(image_id, (page_id.0 + 2, 0));
254
255 if doc.catalog().is_err() {
257 let pages_obj_id = doc.new_object_id();
259 let catalog_id = doc.add_object(lopdf::dictionary! {
260 "Type" => "Catalog",
261 "Pages" => Object::Reference(pages_obj_id),
262 });
263 doc.trailer.set("Root", Object::Reference(catalog_id));
264
265 doc.objects.insert(
266 pages_obj_id,
267 lopdf::Object::Dictionary(lopdf::dictionary! {
268 "Type" => "Pages",
269 "Kids" => vec![Object::Reference(page)],
270 "Count" => 1,
271 }),
272 );
273 } else {
274 if let Ok(pages_ref) = doc.catalog().and_then(|c| c.get(b"Pages"))
276 && let Ok(pages_obj_id) = pages_ref.as_reference()
277 && let Ok(pages_dict) = doc.get_object_mut(pages_obj_id)
278 && let Object::Dictionary(dict) = pages_dict
279 {
280 let mut kids = if let Ok(Object::Array(arr)) = dict.get(b"Kids") {
282 arr.clone()
283 } else {
284 vec![]
285 };
286 kids.push(Object::Reference(page));
287
288 dict.set("Kids", Object::Array(kids));
289 #[expect(clippy::cast_possible_wrap, reason = "page count fits in i64")]
290 dict.set("Count", (page_index + 1) as i64);
291 }
292 }
293 }
294
295 let mut pdf_bytes = Vec::new();
297 doc.save_to(&mut BufWriter::new(&mut pdf_bytes))
298 .map_err(|e| PdfError::RebuildFailed {
299 reason: e.to_string(),
300 })?;
301
302 let mut metadata = HashMap::new();
304 metadata.insert(KEY_PAGE_COUNT.to_string(), images.len().to_string());
305
306 Ok(CoverMedia {
307 kind: CoverMediaKind::PdfDocument,
308 data: Bytes::from(pdf_bytes),
309 metadata,
310 })
311 }
312
313 fn embed_in_content_stream(
314 &self,
315 pdf: CoverMedia,
316 payload: &Payload,
317 ) -> Result<CoverMedia, PdfError> {
318 let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
320 reason: e.to_string(),
321 })?;
322
323 let payload_bits: Vec<u8> = payload
325 .as_bytes()
326 .iter()
327 .flat_map(|byte| (0..8).rev().map(move |i| (byte >> i) & 1))
328 .collect();
329
330 let mut bit_index = 0;
331
332 let object_ids: Vec<_> = doc.objects.keys().copied().collect();
334 for obj_id in object_ids {
335 if bit_index >= payload_bits.len() {
336 break;
337 }
338
339 if let Ok(obj) = doc.get_object_mut(obj_id)
340 && let Object::Stream(stream) = obj
341 {
342 let content = String::from_utf8_lossy(&stream.content);
344 let mut modified_content = String::new();
345 let mut tokens: Vec<&str> = content.split_whitespace().collect();
346
347 for token in &mut tokens {
348 if bit_index >= payload_bits.len() {
349 modified_content.push_str(token);
350 modified_content.push(' ');
351 continue;
352 }
353
354 if let Ok(mut num) = token.parse::<i32>() {
356 if let Some(&bit) = payload_bits.get(bit_index) {
358 if bit == 1 {
359 num |= 1; } else {
361 num &= !1; }
363 }
364 modified_content.push_str(&num.to_string());
365 bit_index += 1;
366 } else {
367 modified_content.push_str(token);
368 }
369 modified_content.push(' ');
370 }
371
372 stream.set_content(modified_content.trim().as_bytes().to_vec());
374 }
375 }
376
377 if bit_index < payload_bits.len() {
378 return Err(PdfError::EmbedFailed {
379 reason: format!(
380 "insufficient capacity: embedded {bit_index}/{} bits",
381 payload_bits.len()
382 ),
383 });
384 }
385
386 let mut pdf_bytes = Vec::new();
388 doc.save_to(&mut pdf_bytes)
389 .map_err(|e| PdfError::EmbedFailed {
390 reason: e.to_string(),
391 })?;
392
393 Ok(CoverMedia {
394 kind: pdf.kind,
395 data: Bytes::from(pdf_bytes),
396 metadata: pdf.metadata,
397 })
398 }
399
400 fn extract_from_content_stream(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
401 let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
403 reason: e.to_string(),
404 })?;
405
406 let mut extracted_bits = Vec::new();
407
408 for obj in doc.objects.values() {
410 if let Object::Stream(stream) = obj {
411 let content = String::from_utf8_lossy(&stream.content);
413 let tokens: Vec<&str> = content.split_whitespace().collect();
414
415 for token in tokens {
416 if let Ok(num) = token.parse::<i32>() {
418 #[expect(clippy::cast_sign_loss, reason = "LSB is always 0 or 1")]
420 extracted_bits.push((num & 1) as u8);
421 }
422 }
423 }
424 }
425
426 if extracted_bits.is_empty() {
428 return Err(PdfError::ExtractFailed {
429 reason: "no numeric values found in content streams".to_string(),
430 });
431 }
432
433 let mut payload_bytes = Vec::new();
434 for chunk in extracted_bits.chunks(8) {
435 if chunk.len() == 8 {
436 let mut byte = 0u8;
437 for (i, bit) in chunk.iter().enumerate() {
438 byte |= bit << (7 - i);
439 }
440 payload_bytes.push(byte);
441 }
442 }
443
444 Ok(Payload::from_bytes(payload_bytes))
445 }
446
447 fn embed_in_metadata(
448 &self,
449 pdf: CoverMedia,
450 payload: &Payload,
451 ) -> Result<CoverMedia, PdfError> {
452 let mut doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
454 reason: e.to_string(),
455 })?;
456
457 let encoded = general_purpose::STANDARD.encode(payload.as_bytes());
459
460 let xmp_content = format!(
462 r#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
463<x:xmpmeta xmlns:x="adobe:ns:meta/">
464 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
465 <rdf:Description rdf:about=""
466 xmlns:sf="http://shadowforge.org/ns/1.0/">
467 <sf:HiddenData>{encoded}</sf:HiddenData>
468 </rdf:Description>
469 </rdf:RDF>
470</x:xmpmeta>
471<?xpacket end="w"?>"#
472 );
473
474 let metadata_id = doc.add_object(lopdf::Stream::new(
476 lopdf::dictionary! {
477 "Type" => "Metadata",
478 "Subtype" => "XML",
479 },
480 xmp_content.into_bytes(),
481 ));
482
483 if let Ok(catalog) = doc.catalog_mut() {
485 catalog.set("Metadata", Object::Reference(metadata_id));
486 } else {
487 return Err(PdfError::EmbedFailed {
488 reason: "failed to access catalog".to_string(),
489 });
490 }
491
492 let mut pdf_bytes = Vec::new();
494 doc.save_to(&mut pdf_bytes)
495 .map_err(|e| PdfError::EmbedFailed {
496 reason: e.to_string(),
497 })?;
498
499 Ok(CoverMedia {
500 kind: pdf.kind,
501 data: Bytes::from(pdf_bytes),
502 metadata: pdf.metadata,
503 })
504 }
505
506 fn extract_from_metadata(&self, pdf: &CoverMedia) -> Result<Payload, PdfError> {
507 let doc = Document::load_from(&pdf.data[..]).map_err(|e| PdfError::ParseFailed {
509 reason: e.to_string(),
510 })?;
511
512 let catalog = doc.catalog().map_err(|e| PdfError::ExtractFailed {
514 reason: format!("failed to access catalog: {e}"),
515 })?;
516
517 let metadata_ref = catalog
519 .get(b"Metadata")
520 .map_err(|_| PdfError::ExtractFailed {
521 reason: "no metadata found in catalog".to_string(),
522 })?
523 .as_reference()
524 .map_err(|_| PdfError::ExtractFailed {
525 reason: "metadata is not a reference".to_string(),
526 })?;
527
528 let metadata_obj = doc
530 .get_object(metadata_ref)
531 .map_err(|e| PdfError::ExtractFailed {
532 reason: format!("failed to get metadata object: {e}"),
533 })?;
534
535 let metadata_stream = metadata_obj
536 .as_stream()
537 .map_err(|_| PdfError::ExtractFailed {
538 reason: "metadata is not a stream".to_string(),
539 })?;
540
541 let xmp_content = String::from_utf8_lossy(&metadata_stream.content);
543
544 let start_tag = "<sf:HiddenData>";
546 let end_tag = "</sf:HiddenData>";
547
548 let start_idx = xmp_content
549 .find(start_tag)
550 .ok_or_else(|| PdfError::ExtractFailed {
551 reason: "no sf:HiddenData tag found".to_string(),
552 })?
553 .strict_add(start_tag.len());
554
555 let end_idx = xmp_content
556 .find(end_tag)
557 .ok_or_else(|| PdfError::ExtractFailed {
558 reason: "no closing sf:HiddenData tag found".to_string(),
559 })?;
560
561 let encoded_data = &xmp_content[start_idx..end_idx];
562
563 let decoded = general_purpose::STANDARD
565 .decode(encoded_data.trim())
566 .map_err(|e| PdfError::ExtractFailed {
567 reason: format!("base64 decode failed: {e}"),
568 })?;
569
570 Ok(Payload::from_bytes(decoded))
571 }
572}
573
574#[cfg(test)]
577mod tests {
578 use super::*;
579 use tempfile::tempdir;
580
581 type TestResult = Result<(), Box<dyn std::error::Error>>;
582
583 #[test]
584 fn test_load_minimal_pdf() -> TestResult {
585 let processor = PdfProcessorImpl::default();
586 let dir = tempdir()?;
587 let path = dir.path().join("minimal.pdf");
588
589 let mut doc = Document::with_version("1.7");
591 let catalog_pages = doc.new_object_id();
592 let first_page = doc.new_object_id();
593
594 doc.objects.insert(
595 first_page,
596 Object::Dictionary(lopdf::dictionary! {
597 "Type" => "Page",
598 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
599 "Contents" => Object::Reference((first_page.0 + 1, 0)),
600 }),
601 );
602
603 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
604
605 doc.objects.insert(
606 catalog_pages,
607 Object::Dictionary(lopdf::dictionary! {
608 "Type" => "Pages",
609 "Kids" => vec![Object::Reference(first_page)],
610 "Count" => 1,
611 }),
612 );
613
614 let catalog_id = doc.add_object(lopdf::dictionary! {
615 "Type" => "Catalog",
616 "Pages" => Object::Reference(catalog_pages),
617 });
618
619 doc.trailer.set("Root", Object::Reference(catalog_id));
620 doc.save(&path)?;
621
622 let media = processor.load_pdf(&path)?;
624 assert_eq!(media.kind, CoverMediaKind::PdfDocument);
625 assert_eq!(media.metadata.get(KEY_PAGE_COUNT), Some(&"1".to_string()));
626 Ok(())
627 }
628
629 #[test]
630 #[ignore = "requires pdfium system library"]
631 fn test_render_pages_returns_correct_count() -> TestResult {
632 let processor = PdfProcessorImpl::default();
633 let dir = tempdir()?;
634 let path = dir.path().join("two_page.pdf");
635
636 let mut doc = Document::with_version("1.7");
638 let catalog_pages = doc.new_object_id();
639
640 let page1_id = doc.new_object_id();
641 doc.objects.insert(
642 page1_id,
643 Object::Dictionary(lopdf::dictionary! {
644 "Type" => "Page",
645 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
646 "Contents" => Object::Reference((page1_id.0 + 1, 0)),
647 }),
648 );
649 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
650
651 let page2_id = doc.new_object_id();
652 doc.objects.insert(
653 page2_id,
654 Object::Dictionary(lopdf::dictionary! {
655 "Type" => "Page",
656 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
657 "Contents" => Object::Reference((page2_id.0 + 1, 0)),
658 }),
659 );
660 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
661
662 doc.objects.insert(
663 catalog_pages,
664 Object::Dictionary(lopdf::dictionary! {
665 "Type" => "Pages",
666 "Kids" => vec![
667 Object::Reference(page1_id),
668 Object::Reference(page2_id),
669 ],
670 "Count" => 2,
671 }),
672 );
673
674 let catalog_id = doc.add_object(lopdf::dictionary! {
675 "Type" => "Catalog",
676 "Pages" => Object::Reference(catalog_pages),
677 });
678
679 doc.trailer.set("Root", Object::Reference(catalog_id));
680 doc.save(&path)?;
681
682 let media = processor.load_pdf(&path)?;
684 let images = processor.render_pages_to_images(&media)?;
685 assert_eq!(images.len(), 2);
686 Ok(())
687 }
688
689 #[test]
690 #[ignore = "requires pdfium system library"]
691 fn test_rebuild_pdf_roundtrip() -> TestResult {
692 let processor = PdfProcessorImpl::default();
693 let dir = tempdir()?;
694 let path = dir.path().join("original.pdf");
695
696 let mut doc = Document::with_version("1.7");
698 let catalog_pages = doc.new_object_id();
699
700 let page1_id = doc.new_object_id();
701 doc.objects.insert(
702 page1_id,
703 Object::Dictionary(lopdf::dictionary! {
704 "Type" => "Page",
705 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
706 "Contents" => Object::Reference((page1_id.0 + 1, 0)),
707 }),
708 );
709 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
710
711 let page2_id = doc.new_object_id();
712 doc.objects.insert(
713 page2_id,
714 Object::Dictionary(lopdf::dictionary! {
715 "Type" => "Page",
716 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
717 "Contents" => Object::Reference((page2_id.0 + 1, 0)),
718 }),
719 );
720 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
721
722 doc.objects.insert(
723 catalog_pages,
724 Object::Dictionary(lopdf::dictionary! {
725 "Type" => "Pages",
726 "Kids" => vec![
727 Object::Reference(page1_id),
728 Object::Reference(page2_id),
729 ],
730 "Count" => 2,
731 }),
732 );
733
734 let catalog_id = doc.add_object(lopdf::dictionary! {
735 "Type" => "Catalog",
736 "Pages" => Object::Reference(catalog_pages),
737 });
738
739 doc.trailer.set("Root", Object::Reference(catalog_id));
740 doc.save(&path)?;
741
742 let original = processor.load_pdf(&path)?;
744 let images = processor.render_pages_to_images(&original)?;
745 let rebuilt = processor.rebuild_pdf_from_images(images, &original)?;
746
747 let rebuilt_path = dir.path().join("rebuilt.pdf");
749 processor.save_pdf(&rebuilt, &rebuilt_path)?;
750 let reloaded = processor.load_pdf(&rebuilt_path)?;
751
752 assert_eq!(
753 reloaded.metadata.get(KEY_PAGE_COUNT),
754 original.metadata.get(KEY_PAGE_COUNT)
755 );
756 Ok(())
757 }
758
759 #[test]
760 #[ignore = "lopdf requires actual encrypted content, not just Encrypt trailer"]
761 fn test_encrypted_pdf_error() -> TestResult {
762 let processor = PdfProcessorImpl::default();
763 let dir = tempdir()?;
764 let path = dir.path().join("encrypted.pdf");
765
766 let mut doc = Document::with_version("1.7");
768 let catalog_pages = doc.new_object_id();
769 let first_page = doc.new_object_id();
770
771 doc.objects.insert(
772 first_page,
773 Object::Dictionary(lopdf::dictionary! {
774 "Type" => "Page",
775 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
776 "Contents" => Object::Reference((first_page.0 + 1, 0)),
777 }),
778 );
779
780 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
781
782 doc.objects.insert(
783 catalog_pages,
784 Object::Dictionary(lopdf::dictionary! {
785 "Type" => "Pages",
786 "Kids" => vec![Object::Reference(first_page)],
787 "Count" => 1,
788 }),
789 );
790
791 let catalog_id = doc.add_object(lopdf::dictionary! {
792 "Type" => "Catalog",
793 "Pages" => Object::Reference(catalog_pages),
794 });
795
796 doc.trailer.set("Root", Object::Reference(catalog_id));
797
798 doc.trailer
800 .set("Encrypt", Object::Reference((doc.max_id + 1, 0)));
801 doc.objects.insert(
802 (doc.max_id + 1, 0),
803 Object::Dictionary(lopdf::dictionary! {
804 "Filter" => "Standard",
805 "V" => 1,
806 "R" => 2,
807 }),
808 );
809
810 doc.save(&path)?;
811
812 let result = processor.load_pdf(&path);
814 assert!(matches!(result, Err(PdfError::Encrypted)));
815 Ok(())
816 }
817
818 #[test]
819 fn test_content_stream_lsb_roundtrip() -> TestResult {
820 let processor = PdfProcessorImpl::default();
821 let dir = tempdir()?;
822 let path = dir.path().join("test.pdf");
823
824 let mut doc = Document::with_version("1.7");
826 let catalog_pages = doc.new_object_id();
827 let first_page = doc.new_object_id();
828
829 doc.objects.insert(
830 first_page,
831 Object::Dictionary(lopdf::dictionary! {
832 "Type" => "Page",
833 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
834 "Contents" => Object::Reference((first_page.0 + 1, 0)),
835 }),
836 );
837
838 let content = b"BT\n/F1 12 Tf\n100 700 Td\n(Hello) Tj\n200 650 Td\n(World) Tj\n50 600 Td\n(Test) Tj\n150 550 Td\n(PDF) Tj\nET\n1 0 0 1 0 0 cm\n";
840 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, content.to_vec()));
841
842 doc.objects.insert(
843 catalog_pages,
844 Object::Dictionary(lopdf::dictionary! {
845 "Type" => "Pages",
846 "Kids" => vec![Object::Reference(first_page)],
847 "Count" => 1,
848 }),
849 );
850
851 let catalog_id = doc.add_object(lopdf::dictionary! {
852 "Type" => "Catalog",
853 "Pages" => Object::Reference(catalog_pages),
854 });
855
856 doc.trailer.set("Root", Object::Reference(catalog_id));
857 doc.save(&path)?;
858
859 let original = processor.load_pdf(&path)?;
861 let payload = Payload::from_bytes(vec![0xAB]); let stego = processor.embed_in_content_stream(original, &payload)?;
863
864 let stego_path = dir.path().join("stego.pdf");
866 processor.save_pdf(&stego, &stego_path)?;
867 let reloaded = processor.load_pdf(&stego_path)?;
868
869 let extracted = processor.extract_from_content_stream(&reloaded)?;
871 assert_eq!(extracted.as_bytes(), payload.as_bytes());
872 Ok(())
873 }
874
875 #[test]
876 fn test_metadata_embed_roundtrip() -> TestResult {
877 let processor = PdfProcessorImpl::default();
878 let dir = tempdir()?;
879 let path = dir.path().join("test.pdf");
880
881 let mut doc = Document::with_version("1.7");
883 let catalog_pages = doc.new_object_id();
884 let first_page = doc.new_object_id();
885
886 doc.objects.insert(
887 first_page,
888 Object::Dictionary(lopdf::dictionary! {
889 "Type" => "Page",
890 "MediaBox" => vec![0.into(), 0.into(), 612.into(), 792.into()],
891 "Contents" => Object::Reference((first_page.0 + 1, 0)),
892 }),
893 );
894
895 doc.add_object(lopdf::Stream::new(lopdf::dictionary! {}, b"".to_vec()));
896
897 doc.objects.insert(
898 catalog_pages,
899 Object::Dictionary(lopdf::dictionary! {
900 "Type" => "Pages",
901 "Kids" => vec![Object::Reference(first_page)],
902 "Count" => 1,
903 }),
904 );
905
906 let catalog_id = doc.add_object(lopdf::dictionary! {
907 "Type" => "Catalog",
908 "Pages" => Object::Reference(catalog_pages),
909 });
910
911 doc.trailer.set("Root", Object::Reference(catalog_id));
912 doc.save(&path)?;
913
914 let original = processor.load_pdf(&path)?;
916 let payload = Payload::from_bytes(vec![0u8; 128]); let stego = processor.embed_in_metadata(original, &payload)?;
918
919 let stego_path = dir.path().join("stego.pdf");
921 processor.save_pdf(&stego, &stego_path)?;
922 let reloaded = processor.load_pdf(&stego_path)?;
923
924 let extracted = processor.extract_from_metadata(&reloaded)?;
926 assert_eq!(extracted.as_bytes(), payload.as_bytes());
927 Ok(())
928 }
929}