1use crate::error::{EngineError, Result};
5use crate::geometry::{self, PageGeometry};
6use crate::limits::{LimitError, ProcessingLimits};
7use std::sync::{Arc, Mutex};
8
9type LimitSlot = Arc<Mutex<Option<(u64, u64)>>>;
14use crate::render::{self, ColorMode, RenderConfig, RenderOptions, RenderedPage};
15use crate::text::{TextBlock, TextExtractionDevice};
16use crate::thumbnail::ThumbnailOptions;
17
18use pdf_forms::parse::parse_acroform;
19use pdf_forms::tree::{FieldType, FieldValue};
20use pdf_render::pdf_interpret::PageExt;
21use pdf_render::pdf_interpret::{interpret_page, Context, InterpreterSettings, InterpreterWarning};
22use pdf_render::pdf_syntax::object::dict::keys::{FIRST, NEXT, OUTLINES, TITLE};
23use pdf_render::pdf_syntax::object::Dict;
24use pdf_render::pdf_syntax::page::Page;
25use pdf_render::pdf_syntax::{Pdf, PdfLoadLimits};
26#[cfg(feature = "parallel")]
27use rayon::prelude::*;
28
29use kurbo::Rect;
30
31#[derive(Debug, Clone, Default)]
33pub struct DocumentInfo {
34 pub title: Option<String>,
36 pub author: Option<String>,
38 pub subject: Option<String>,
40 pub keywords: Option<String>,
42 pub creator: Option<String>,
44 pub producer: Option<String>,
46}
47
48#[derive(Debug, Clone)]
50pub struct BookmarkItem {
51 pub title: String,
53 pub page: Option<usize>,
55 pub children: Vec<BookmarkItem>,
57}
58
59pub struct PdfDocument {
61 pdf: Pdf,
62 settings: InterpreterSettings,
63}
64
65impl PdfDocument {
66 pub fn open(data: impl Into<pdf_render::pdf_syntax::PdfData>) -> Result<Self> {
68 let pdf = Pdf::new(data).map_err(|e| match e {
69 pdf_render::pdf_syntax::LoadPdfError::Decryption(d) => {
70 EngineError::Encrypted(format!("{d:?}"))
71 }
72 _ => EngineError::InvalidPdf(format!("{e:?}")),
73 })?;
74 Ok(Self {
75 pdf,
76 settings: InterpreterSettings::default(),
77 })
78 }
79
80 pub fn open_with_processing_limits(
82 data: impl Into<pdf_render::pdf_syntax::PdfData>,
83 limits: ProcessingLimits,
84 ) -> Result<Self> {
85 let syntax_limits = PdfLoadLimits::new()
86 .max_object_depth(limits.max_object_depth)
87 .max_image_pixels(limits.max_image_pixels)
88 .max_stream_bytes(limits.max_stream_bytes);
89 let pdf = Pdf::new_with_limits(data, syntax_limits).map_err(|e| match e {
90 pdf_render::pdf_syntax::LoadPdfError::Decryption(d) => {
91 EngineError::Encrypted(format!("{d:?}"))
92 }
93 _ => EngineError::InvalidPdf(format!("{e:?}")),
94 })?;
95 let settings = InterpreterSettings {
96 max_operator_count: Some(limits.max_operator_count),
97 ..InterpreterSettings::default()
98 };
99 Ok(Self { pdf, settings })
100 }
101
102 pub fn open_with_password(
104 data: impl Into<pdf_render::pdf_syntax::PdfData>,
105 password: &str,
106 ) -> Result<Self> {
107 let pdf = Pdf::new_with_password(data, password).map_err(|e| match e {
108 pdf_render::pdf_syntax::LoadPdfError::Decryption(d) => {
109 EngineError::Encrypted(format!("{d:?}"))
110 }
111 _ => EngineError::InvalidPdf(format!("{e:?}")),
112 })?;
113 Ok(Self {
114 pdf,
115 settings: InterpreterSettings::default(),
116 })
117 }
118
119 pub fn open_with_password_and_processing_limits(
121 data: impl Into<pdf_render::pdf_syntax::PdfData>,
122 password: &str,
123 limits: ProcessingLimits,
124 ) -> Result<Self> {
125 let syntax_limits = PdfLoadLimits::new()
126 .max_object_depth(limits.max_object_depth)
127 .max_image_pixels(limits.max_image_pixels)
128 .max_stream_bytes(limits.max_stream_bytes);
129 let pdf = Pdf::new_with_password_and_limits(data, password, syntax_limits).map_err(
130 |e| match e {
131 pdf_render::pdf_syntax::LoadPdfError::Decryption(d) => {
132 EngineError::Encrypted(format!("{d:?}"))
133 }
134 _ => EngineError::InvalidPdf(format!("{e:?}")),
135 },
136 )?;
137 let settings = InterpreterSettings {
138 max_operator_count: Some(limits.max_operator_count),
139 ..InterpreterSettings::default()
140 };
141 Ok(Self { pdf, settings })
142 }
143
144 pub fn pdf(&self) -> &Pdf {
146 &self.pdf
147 }
148
149 pub fn set_settings(&mut self, settings: InterpreterSettings) {
151 self.settings = settings;
152 }
153
154 pub fn page_count(&self) -> usize {
156 self.pdf.pages().len()
157 }
158
159 pub fn page_geometry(&self, index: usize) -> Result<PageGeometry> {
161 let page = self.get_page(index)?;
162 Ok(geometry::extract_geometry(page))
163 }
164
165 pub fn render_page(&self, index: usize, options: &RenderOptions) -> Result<RenderedPage> {
173 #[cfg(feature = "xfa")]
174 if let Some(flat_doc) = self.open_flattened_xfa_for_render() {
175 return flat_doc.render_page(index, options);
176 }
177 let page = self.get_page(index)?;
178 let (w, h) = page.render_dimensions();
182 if w <= 0.0 || h <= 0.0 {
183 return Err(EngineError::InvalidPageGeometry {
184 width: w,
185 height: h,
186 reason: "page has zero or negative dimensions".into(),
187 });
188 }
189 const MIN_PAGE_PT: f32 = 1.0;
192 if w < MIN_PAGE_PT || h < MIN_PAGE_PT {
193 return Err(EngineError::InvalidPageGeometry {
194 width: w,
195 height: h,
196 reason: "page too small to render (< 1pt)".into(),
197 });
198 }
199 let (settings, slot) = Self::with_limit_collector(&self.settings);
200 let rendered = render::render_page(page, options, &settings);
201 Self::check_limit_slot(&slot)?;
202 Ok(rendered)
203 }
204
205 pub fn render_page_with_config(
210 &self,
211 index: usize,
212 config: &RenderConfig,
213 ) -> Result<RenderedPage> {
214 #[cfg(feature = "xfa")]
215 if let Some(flat_doc) = self.open_flattened_xfa_for_render() {
216 return flat_doc.render_page_with_config(index, config);
217 }
218 let page = self.get_page(index)?;
219 let (w, h) = page.render_dimensions();
220 if w <= 0.0 || h <= 0.0 {
221 return Err(EngineError::InvalidPageGeometry {
222 width: w,
223 height: h,
224 reason: "page has zero or negative dimensions".into(),
225 });
226 }
227 const MIN_PAGE_PT: f32 = 1.0;
228 if w < MIN_PAGE_PT || h < MIN_PAGE_PT {
229 return Err(EngineError::InvalidPageGeometry {
230 width: w,
231 height: h,
232 reason: "page too small to render (< 1pt)".into(),
233 });
234 }
235 let (settings, slot) = Self::with_limit_collector(&self.settings);
236 let rendered = render::render_page_with_config(page, config, &settings);
237 Self::check_limit_slot(&slot)?;
238 Ok(rendered)
239 }
240
241 pub fn render_page_cmyk(&self, index: usize, dpi: u32) -> Result<RenderedPage> {
243 self.render_page_with_config(
244 index,
245 &RenderConfig {
246 color_mode: ColorMode::PreserveCmyk,
247 dpi,
248 },
249 )
250 }
251
252 pub fn render_all(&self, options: &RenderOptions) -> Vec<RenderedPage> {
254 let pages = self.pdf.pages();
255 #[cfg(feature = "parallel")]
256 return (0..pages.len())
257 .into_par_iter()
258 .map(|i| render::render_page(&pages[i], options, &self.settings))
259 .collect();
260 #[cfg(not(feature = "parallel"))]
261 (0..pages.len())
262 .map(|i| render::render_page(&pages[i], options, &self.settings))
263 .collect()
264 }
265
266 pub fn render_all_with_config(&self, config: &RenderConfig) -> Vec<RenderedPage> {
268 let pages = self.pdf.pages();
269 #[cfg(feature = "parallel")]
270 return (0..pages.len())
271 .into_par_iter()
272 .map(|i| render::render_page_with_config(&pages[i], config, &self.settings))
273 .collect();
274 #[cfg(not(feature = "parallel"))]
275 (0..pages.len())
276 .map(|i| render::render_page_with_config(&pages[i], config, &self.settings))
277 .collect()
278 }
279
280 pub fn thumbnail(&self, index: usize, options: &ThumbnailOptions) -> Result<RenderedPage> {
282 let page = self.get_page(index)?;
283 Ok(render::render_thumbnail(
284 page,
285 options.max_dimension,
286 &self.settings,
287 ))
288 }
289
290 pub fn thumbnails_all(&self, options: &ThumbnailOptions) -> Vec<RenderedPage> {
292 let pages = self.pdf.pages();
293 #[cfg(feature = "parallel")]
294 return (0..pages.len())
295 .into_par_iter()
296 .map(|i| render::render_thumbnail(&pages[i], options.max_dimension, &self.settings))
297 .collect();
298 #[cfg(not(feature = "parallel"))]
299 (0..pages.len())
300 .map(|i| render::render_thumbnail(&pages[i], options.max_dimension, &self.settings))
301 .collect()
302 }
303
304 pub fn extract_text(&self, index: usize) -> Result<String> {
306 let page = self.get_page(index)?;
307 let (settings, slot) = Self::with_limit_collector(&self.text_extraction_settings());
308 let mut device = TextExtractionDevice::new();
309 let mut ctx = Self::create_context_with_settings(page, settings);
310 interpret_page(page, &mut ctx, &mut device);
311 Self::check_limit_slot(&slot)?;
312 Ok(device.into_text())
313 }
314
315 #[doc(hidden)]
317 pub fn extract_text_pages_reusing_settings<I>(&self, indices: I) -> Result<Vec<String>>
318 where
319 I: IntoIterator<Item = usize>,
320 {
321 let pages = self.pdf.pages();
322 let mut settings = self.text_extraction_settings();
323 let indices = indices.into_iter();
324 let (lower_bound, upper_bound) = indices.size_hint();
325 let mut texts = Vec::with_capacity(upper_bound.unwrap_or(lower_bound));
326
327 for index in indices {
328 let page = pages.get(index).ok_or(EngineError::PageOutOfRange {
329 index,
330 count: pages.len(),
331 })?;
332 let (text, next_settings) = Self::extract_text_with_settings(page, settings);
333 settings = next_settings;
334 texts.push(text);
335 }
336
337 Ok(texts)
338 }
339
340 pub fn extract_text_blocks(&self, index: usize) -> Result<Vec<TextBlock>> {
342 let page = self.get_page(index)?;
343 let (settings, slot) = Self::with_limit_collector(&self.text_extraction_settings());
344 let mut device = TextExtractionDevice::new();
345 let mut ctx = Self::create_context_with_settings(page, settings);
346 interpret_page(page, &mut ctx, &mut device);
347 Self::check_limit_slot(&slot)?;
348 Ok(device.into_blocks())
349 }
350
351 pub fn extract_all_text_blocks(&self) -> Vec<Vec<TextBlock>> {
353 let pages = self.pdf.pages();
354 let mut settings = self.text_extraction_settings();
355 let mut blocks = Vec::with_capacity(pages.len());
356
357 for page in pages.iter() {
358 let (page_blocks, next_settings) =
359 Self::extract_text_blocks_with_settings(page, settings);
360 settings = next_settings;
361 blocks.push(page_blocks);
362 }
363
364 blocks
365 }
366
367 pub fn extract_acroform_text(&self) -> String {
373 let Some(tree) = parse_acroform(&self.pdf) else {
374 return String::new();
375 };
376 let mut parts: Vec<String> = Vec::new();
377 for id in tree.all_ids() {
378 let node = tree.get(id);
379 if node.children.is_empty() {
380 let value_str = match &node.value {
382 Some(FieldValue::Text(s)) if !s.is_empty() => Some(s.clone()),
383 Some(FieldValue::StringArray(arr)) => {
384 let joined = arr
385 .iter()
386 .filter(|s| !s.is_empty())
387 .cloned()
388 .collect::<Vec<_>>()
389 .join(", ");
390 if joined.is_empty() {
391 None
392 } else {
393 Some(joined)
394 }
395 }
396 _ => None,
397 };
398 let button_caption =
399 value_str.is_none() && tree.effective_field_type(id) == Some(FieldType::Button);
400 let extracted = value_str.or_else(|| {
401 button_caption.then(|| {
402 node.mk
403 .as_ref()
404 .and_then(|mk| mk.caption.as_ref())
405 .filter(|caption| !caption.is_empty())
406 .cloned()
407 })?
408 });
409 if let Some(s) = extracted {
410 parts.push(s);
411 }
412 }
413 }
414 parts.join("\n")
415 }
416
417 pub fn extract_all_text(&self) -> String {
420 let pages = self.pdf.pages();
421 let mut settings = self.text_extraction_settings();
422 let mut page_texts = Vec::with_capacity(pages.len());
423 for page in pages.iter() {
424 let (page_text, next_settings) = Self::extract_text_with_settings(page, settings);
425 settings = next_settings;
426 page_texts.push(page_text);
427 }
428
429 let mut text = join_page_texts(page_texts.iter().map(String::as_str));
430 let acroform = self.extract_acroform_text();
431 if !acroform.is_empty() {
432 if !text.is_empty() && !text.ends_with('\n') {
433 text.push('\n');
434 }
435 text.push_str(&acroform);
436 }
437 text
438 }
439
440 pub fn search_text(&self, query: &str) -> Vec<usize> {
442 let pages = self.pdf.pages();
443 let query_lower = query.to_lowercase();
444 #[cfg(feature = "parallel")]
445 let page_contains = |i: usize| -> Option<usize> {
446 let page = &pages[i];
447 let (text, _) = Self::extract_text_with_settings(page, self.text_extraction_settings());
448 if text.to_lowercase().contains(&query_lower) {
449 Some(i)
450 } else {
451 None
452 }
453 };
454 #[cfg(feature = "parallel")]
455 return (0..pages.len())
456 .into_par_iter()
457 .filter_map(page_contains)
458 .collect();
459 #[cfg(not(feature = "parallel"))]
460 {
461 let mut settings = self.text_extraction_settings();
462 let mut hits = Vec::new();
463 for (i, page) in pages.iter().enumerate() {
464 let (text, next_settings) = Self::extract_text_with_settings(page, settings);
465 settings = next_settings;
466 if text.to_lowercase().contains(&query_lower) {
467 hits.push(i);
468 }
469 }
470 hits
471 }
472 }
473
474 pub fn info(&self) -> DocumentInfo {
476 let meta = self.pdf.metadata();
477 DocumentInfo {
478 title: meta.title.as_ref().map(|b| bytes_to_string(b)),
479 author: meta.author.as_ref().map(|b| bytes_to_string(b)),
480 subject: meta.subject.as_ref().map(|b| bytes_to_string(b)),
481 keywords: meta.keywords.as_ref().map(|b| bytes_to_string(b)),
482 creator: meta.creator.as_ref().map(|b| bytes_to_string(b)),
483 producer: meta.producer.as_ref().map(|b| bytes_to_string(b)),
484 }
485 }
486
487 pub fn bookmarks(&self) -> Vec<BookmarkItem> {
489 let xref = self.pdf.xref();
490 let root_id = xref.root_id();
491 let catalog: Dict<'_> = match xref.get(root_id) {
492 Some(d) => d,
493 None => return Vec::new(),
494 };
495
496 let outlines: Dict<'_> = match catalog.get(OUTLINES) {
497 Some(d) => d,
498 None => return Vec::new(),
499 };
500
501 let first: Dict<'_> = match outlines.get(FIRST) {
502 Some(d) => d,
503 None => return Vec::new(),
504 };
505
506 parse_outline_items(&first)
507 }
508
509 pub fn ocr_page(
528 &self,
529 index: usize,
530 backend: &dyn crate::ocr::OcrBackend,
531 dpi: f64,
532 ) -> crate::error::Result<crate::ocr::OcrResult> {
533 let opts = crate::render::RenderOptions {
534 dpi,
535 ..Default::default()
536 };
537 let rendered = self.render_page(index, &opts)?;
538
539 let mut rgb = Vec::with_capacity((rendered.width * rendered.height * 3) as usize);
541 for chunk in rendered.pixels.chunks(4) {
542 rgb.push(chunk[0]);
543 rgb.push(chunk[1]);
544 rgb.push(chunk[2]);
545 }
546
547 backend
548 .recognize(&rgb, rendered.width, rendered.height)
549 .map_err(|e| crate::error::EngineError::RenderError(e.to_string()))
550 }
551
552 fn with_limit_collector(settings: &InterpreterSettings) -> (InterpreterSettings, LimitSlot) {
559 let slot: LimitSlot = Arc::new(Mutex::new(None));
560 let slot_clone = Arc::clone(&slot);
561 let prev_sink = settings.warning_sink.clone();
562 let mut new_settings = settings.clone();
563 new_settings.warning_sink = Arc::new(move |w: InterpreterWarning| {
564 if let InterpreterWarning::StreamTooLarge { observed, limit } = w {
565 let mut guard = slot_clone.lock().unwrap_or_else(|e| e.into_inner());
566 if guard.is_none() {
567 *guard = Some((observed, limit));
568 }
569 }
570 prev_sink(w);
571 });
572 (new_settings, slot)
573 }
574
575 fn check_limit_slot(slot: &LimitSlot) -> Result<()> {
580 if let Some((observed, limit)) = *slot.lock().unwrap_or_else(|e| e.into_inner()) {
581 return Err(EngineError::LimitExceeded(LimitError::StreamTooLarge {
582 actual_bytes: observed,
583 limit_bytes: limit,
584 }));
585 }
586 Ok(())
587 }
588
589 fn get_page(&self, index: usize) -> Result<&Page<'_>> {
590 let pages = self.pdf.pages();
591 if index >= pages.len() {
592 return Err(EngineError::PageOutOfRange {
593 index,
594 count: pages.len(),
595 });
596 }
597 Ok(&pages[index])
598 }
599
600 fn text_extraction_settings(&self) -> InterpreterSettings {
601 let mut settings = self.settings.clone();
602 settings.skip_signature_widgets = false;
605 settings
606 }
607
608 fn create_context_with_settings<'a>(
609 page: &Page<'a>,
610 settings: InterpreterSettings,
611 ) -> Context<'a> {
612 let (w, h) = page.render_dimensions();
613 Context::new(
614 page.initial_transform(false),
615 Rect::new(0.0, 0.0, w as f64, h as f64),
616 page.xref(),
617 settings,
618 )
619 }
620
621 fn extract_text_with_settings<'a>(
622 page: &Page<'a>,
623 settings: InterpreterSettings,
624 ) -> (String, InterpreterSettings) {
625 let mut device = TextExtractionDevice::new();
626 let mut ctx = Self::create_context_with_settings(page, settings);
627 interpret_page(page, &mut ctx, &mut device);
628 let settings = ctx.into_settings();
629 (device.into_text(), settings)
630 }
631
632 fn extract_text_blocks_with_settings<'a>(
633 page: &Page<'a>,
634 settings: InterpreterSettings,
635 ) -> (Vec<TextBlock>, InterpreterSettings) {
636 let mut device = TextExtractionDevice::new();
637 let mut ctx = Self::create_context_with_settings(page, settings);
638 interpret_page(page, &mut ctx, &mut device);
639 let settings = ctx.into_settings();
640 (device.into_blocks(), settings)
641 }
642
643 #[cfg(feature = "xfa")]
644 fn open_flattened_xfa_for_render(&self) -> Option<Self> {
645 if !crate::xfa::has_xfa(self) {
646 return None;
647 }
648
649 let flat_bytes = crate::xfa::flatten(self).ok()?;
650 let mut flat_doc = Self::open(flat_bytes).ok()?;
651 flat_doc.settings = self.settings.clone();
652 Some(flat_doc)
653 }
654}
655
656fn join_page_texts<I>(page_texts: I) -> String
657where
658 I: IntoIterator,
659 I::Item: AsRef<str>,
660{
661 let mut text = String::new();
662 let mut is_first = true;
663
664 for page_text in page_texts {
665 if !is_first {
666 while !text.is_empty() && !text.ends_with("\n\n") {
667 text.push('\n');
668 }
669 text.push('\u{000C}');
670 }
671 text.push_str(page_text.as_ref());
672 is_first = false;
673 }
674
675 text
676}
677
678#[cfg(test)]
679mod extract_all_text_tests {
680 use super::join_page_texts;
681
682 #[test]
683 fn separates_nonempty_pages_like_pdftotext() {
684 assert_eq!(
685 join_page_texts(["Page 1", "Page 2"]),
686 "Page 1\n\n\u{000C}Page 2"
687 );
688 }
689
690 #[test]
691 fn preserves_leading_blank_pages_without_extra_newlines() {
692 assert_eq!(join_page_texts(["", "Page 2"]), "\u{000C}Page 2");
693 }
694
695 #[test]
696 fn reuses_existing_blank_line_before_form_feed() {
697 assert_eq!(
698 join_page_texts(["Page 1\n\n", "Page 2"]),
699 "Page 1\n\n\u{000C}Page 2"
700 );
701 }
702}
703
704fn parse_outline_items(item_dict: &Dict<'_>) -> Vec<BookmarkItem> {
706 let mut items = Vec::new();
707 let mut current: Option<Dict<'_>> = Some(item_dict.clone());
708
709 while let Some(dict) = current {
710 let title = dict
711 .get::<pdf_render::pdf_syntax::object::String>(TITLE)
712 .map(|s| bytes_to_string(s.as_bytes()))
713 .unwrap_or_default();
714
715 let children = match dict.get::<Dict<'_>>(FIRST) {
716 Some(child_dict) => parse_outline_items(&child_dict),
717 None => Vec::new(),
718 };
719
720 items.push(BookmarkItem {
721 title,
722 page: None, children,
724 });
725
726 current = dict.get::<Dict<'_>>(NEXT);
727 }
728
729 items
730}
731
732fn bytes_to_string(bytes: &[u8]) -> String {
734 if bytes.len() >= 2 && bytes[0] == 0xFE && bytes[1] == 0xFF {
736 let chars: Vec<u16> = bytes[2..]
737 .chunks(2)
738 .filter_map(|c| {
739 if c.len() == 2 {
740 Some(u16::from_be_bytes([c[0], c[1]]))
741 } else {
742 None
743 }
744 })
745 .collect();
746 return String::from_utf16_lossy(&chars);
747 }
748
749 match std::str::from_utf8(bytes) {
751 Ok(s) => s.to_string(),
752 Err(_) => bytes.iter().map(|&b| b as char).collect(),
753 }
754}
755
756#[cfg(test)]
757mod tests {
758 use super::*;
759 use crate::render::{ColorMode, PixelFormat, RenderConfig, RenderOptions};
760 use lopdf::{Document as LoDocument, Object};
761 use std::path::PathBuf;
762
763 fn corpus_path(name: &str) -> PathBuf {
764 PathBuf::from(env!("CARGO_MANIFEST_DIR"))
765 .join("../../corpus")
766 .join(name)
767 }
768
769 fn normalize_text(text: &str) -> String {
770 text.split_whitespace().collect::<Vec<_>>().join(" ")
771 }
772
773 fn strip_type0_tounicode(data: &[u8]) -> (Vec<u8>, usize) {
774 fn get_name(dict: &lopdf::Dictionary, key: &[u8]) -> Option<Vec<u8>> {
775 match dict.get(key).ok()? {
776 Object::Name(name) => Some(name.clone()),
777 _ => None,
778 }
779 }
780
781 fn descendant_is_cidfont_type2(doc: &LoDocument, type0: &lopdf::Dictionary) -> bool {
782 let Some(Object::Array(descendants)) = type0.get(b"DescendantFonts").ok() else {
783 return false;
784 };
785 let Some(Object::Reference(desc_id)) = descendants.first() else {
786 return false;
787 };
788 let Ok(Object::Dictionary(descendant)) = doc.get_object(*desc_id) else {
789 return false;
790 };
791 matches!(
792 descendant.get(b"Subtype").ok(),
793 Some(Object::Name(name)) if name.as_slice() == b"CIDFontType2"
794 )
795 }
796
797 let mut doc = LoDocument::load_mem(data).expect("load stripped-to-unicode fixture");
798 let ids: Vec<_> = doc.objects.keys().copied().collect();
799 let mut removed = 0usize;
800
801 for id in ids {
802 let Some(Object::Dictionary(dict)) = doc.objects.get(&id) else {
803 continue;
804 };
805 if !matches!(
806 dict.get(b"Subtype").ok(),
807 Some(Object::Name(name)) if name.as_slice() == b"Type0"
808 ) {
809 continue;
810 }
811 if !matches!(
812 get_name(dict, b"Encoding").as_deref(),
813 Some(b"Identity-H") | Some(b"Identity-V")
814 ) {
815 continue;
816 }
817 if !descendant_is_cidfont_type2(&doc, dict) {
818 continue;
819 }
820
821 if let Some(Object::Dictionary(type0)) = doc.objects.get_mut(&id) {
822 if type0.has(b"ToUnicode") {
823 type0.remove(b"ToUnicode");
824 removed += 1;
825 }
826 }
827 }
828
829 let mut out = Vec::new();
830 doc.save_to(&mut out)
831 .expect("save stripped-to-unicode fixture");
832 (out, removed)
833 }
834
835 fn solid_fill_pdf_bytes(color_operator: &str) -> Vec<u8> {
836 use lopdf::{dictionary, Document, Object, Stream};
837
838 let mut doc = Document::with_version("1.4");
839
840 let pages_id = doc.new_object_id();
841 let page_id = doc.new_object_id();
842 let content = format!("{color_operator}\n0 0 72 72 re f\n");
843 let content_id = doc.add_object(Stream::new(dictionary! {}, content.into_bytes()));
844
845 doc.objects.insert(
846 page_id,
847 Object::Dictionary(dictionary! {
848 "Type" => Object::Name(b"Page".to_vec()),
849 "Parent" => Object::Reference(pages_id),
850 "MediaBox" => Object::Array(vec![
851 Object::Integer(0),
852 Object::Integer(0),
853 Object::Integer(72),
854 Object::Integer(72),
855 ]),
856 "Contents" => Object::Reference(content_id),
857 }),
858 );
859
860 doc.objects.insert(
861 pages_id,
862 Object::Dictionary(dictionary! {
863 "Type" => Object::Name(b"Pages".to_vec()),
864 "Kids" => Object::Array(vec![Object::Reference(page_id)]),
865 "Count" => Object::Integer(1),
866 }),
867 );
868
869 let catalog_id = doc.new_object_id();
870 doc.objects.insert(
871 catalog_id,
872 Object::Dictionary(dictionary! {
873 "Type" => Object::Name(b"Catalog".to_vec()),
874 "Pages" => Object::Reference(pages_id),
875 }),
876 );
877
878 doc.trailer.set("Root", Object::Reference(catalog_id));
879
880 let mut bytes = Vec::new();
881 doc.save_to(&mut bytes).expect("save solid fill fixture");
882 bytes
883 }
884
885 fn mixed_rgb_cmyk_pdf_bytes() -> Vec<u8> {
886 use lopdf::{dictionary, Document, Object, Stream};
887
888 let mut doc = Document::with_version("1.4");
889 let pages_id = doc.new_object_id();
890 let page_id = doc.new_object_id();
891 let content = b"1 0 0 rg\n0 0 36 72 re f\n1 0 0 0 k\n36 0 36 72 re f\n".to_vec();
892 let content_id = doc.add_object(Stream::new(dictionary! {}, content));
893
894 doc.objects.insert(
895 page_id,
896 Object::Dictionary(dictionary! {
897 "Type" => "Page",
898 "Parent" => Object::Reference(pages_id),
899 "MediaBox" => Object::Array(vec![0.into(), 0.into(), 72.into(), 72.into()]),
900 "Contents" => Object::Reference(content_id),
901 }),
902 );
903 doc.objects.insert(
904 pages_id,
905 Object::Dictionary(dictionary! {
906 "Type" => "Pages",
907 "Kids" => Object::Array(vec![Object::Reference(page_id)]),
908 "Count" => Object::Integer(1),
909 }),
910 );
911 let catalog_id = doc.new_object_id();
912 doc.objects.insert(
913 catalog_id,
914 Object::Dictionary(dictionary! {
915 "Type" => "Catalog",
916 "Pages" => Object::Reference(pages_id),
917 }),
918 );
919 doc.trailer.set("Root", Object::Reference(catalog_id));
920
921 let mut bytes = Vec::new();
922 doc.save_to(&mut bytes)
923 .expect("save mixed rgb/cmyk fixture");
924 bytes
925 }
926
927 fn transparent_cmyk_pdf_bytes() -> Vec<u8> {
928 use lopdf::{dictionary, Document, Object, Stream};
929
930 let mut doc = Document::with_version("1.4");
931 let pages_id = doc.new_object_id();
932 let page_id = doc.new_object_id();
933 let gs_id = doc.add_object(Object::Dictionary(dictionary! {
934 "Type" => "ExtGState",
935 "ca" => Object::Real(0.5),
936 }));
937 let content = b"/GS1 gs\n1 0 0 0 k\n0 0 72 72 re f\n".to_vec();
938 let content_id = doc.add_object(Stream::new(dictionary! {}, content));
939
940 doc.objects.insert(
941 page_id,
942 Object::Dictionary(dictionary! {
943 "Type" => "Page",
944 "Parent" => Object::Reference(pages_id),
945 "MediaBox" => Object::Array(vec![0.into(), 0.into(), 72.into(), 72.into()]),
946 "Resources" => dictionary! {
947 "ExtGState" => dictionary! {
948 "GS1" => Object::Reference(gs_id),
949 },
950 },
951 "Contents" => Object::Reference(content_id),
952 }),
953 );
954 doc.objects.insert(
955 pages_id,
956 Object::Dictionary(dictionary! {
957 "Type" => "Pages",
958 "Kids" => Object::Array(vec![Object::Reference(page_id)]),
959 "Count" => Object::Integer(1),
960 }),
961 );
962 let catalog_id = doc.new_object_id();
963 doc.objects.insert(
964 catalog_id,
965 Object::Dictionary(dictionary! {
966 "Type" => "Catalog",
967 "Pages" => Object::Reference(pages_id),
968 }),
969 );
970 doc.trailer.set("Root", Object::Reference(catalog_id));
971
972 let mut bytes = Vec::new();
973 doc.save_to(&mut bytes)
974 .expect("save transparent cmyk fixture");
975 bytes
976 }
977
978 fn cmyk_image_pdf_bytes() -> Vec<u8> {
979 use lopdf::{dictionary, Document, Object, Stream};
980
981 let mut doc = Document::with_version("1.4");
982 let pages_id = doc.new_object_id();
983 let page_id = doc.new_object_id();
984 let image_id = doc.add_object(Stream::new(
985 dictionary! {
986 "Type" => "XObject",
987 "Subtype" => "Image",
988 "Width" => Object::Integer(2),
989 "Height" => Object::Integer(1),
990 "BitsPerComponent" => Object::Integer(8),
991 "ColorSpace" => "DeviceCMYK",
992 },
993 vec![255, 0, 0, 0, 0, 255, 0, 0],
994 ));
995 let content = b"q\n2 0 0 1 0 0 cm\n/Im1 Do\nQ\n".to_vec();
996 let content_id = doc.add_object(Stream::new(dictionary! {}, content));
997
998 doc.objects.insert(
999 page_id,
1000 Object::Dictionary(dictionary! {
1001 "Type" => "Page",
1002 "Parent" => Object::Reference(pages_id),
1003 "MediaBox" => Object::Array(vec![0.into(), 0.into(), 2.into(), 1.into()]),
1004 "Resources" => dictionary! {
1005 "XObject" => dictionary! {
1006 "Im1" => Object::Reference(image_id),
1007 },
1008 },
1009 "Contents" => Object::Reference(content_id),
1010 }),
1011 );
1012 doc.objects.insert(
1013 pages_id,
1014 Object::Dictionary(dictionary! {
1015 "Type" => "Pages",
1016 "Kids" => Object::Array(vec![Object::Reference(page_id)]),
1017 "Count" => Object::Integer(1),
1018 }),
1019 );
1020 let catalog_id = doc.new_object_id();
1021 doc.objects.insert(
1022 catalog_id,
1023 Object::Dictionary(dictionary! {
1024 "Type" => "Catalog",
1025 "Pages" => Object::Reference(pages_id),
1026 }),
1027 );
1028 doc.trailer.set("Root", Object::Reference(catalog_id));
1029
1030 let mut bytes = Vec::new();
1031 doc.save_to(&mut bytes).expect("save cmyk image fixture");
1032 bytes
1033 }
1034
1035 fn pixel_at(rendered: &RenderedPage, x: u32, y: u32) -> [u8; 4] {
1036 let idx = ((y * rendered.width + x) * 4) as usize;
1037 [
1038 rendered.pixels[idx],
1039 rendered.pixels[idx + 1],
1040 rendered.pixels[idx + 2],
1041 rendered.pixels[idx + 3],
1042 ]
1043 }
1044
1045 fn non_embedded_truetype_pdf_bytes(
1050 base_font: &[u8],
1051 encoding: &[u8],
1052 text_bytes: &[u8],
1053 ) -> Vec<u8> {
1054 use lopdf::{dictionary, Document, Object, Stream};
1055
1056 let mut doc = Document::with_version("1.4");
1057
1058 let font_id = doc.add_object(Object::Dictionary(dictionary! {
1059 "Type" => "Font",
1060 "Subtype" => "TrueType",
1061 "Name" => Object::Name(b"F0".to_vec()),
1062 "BaseFont" => Object::Name(base_font.to_vec()),
1063 "Encoding" => Object::Name(encoding.to_vec()),
1064 }));
1065
1066 let resources_id = doc.add_object(Object::Dictionary(dictionary! {
1067 "Font" => dictionary! { "F0" => Object::Reference(font_id) },
1068 }));
1069
1070 let mut content = Vec::new();
1071 content.extend_from_slice(b"BT\n/F0 12 Tf\n100 700 Td\n(");
1072 for &b in text_bytes {
1073 match b {
1074 b'(' | b')' | b'\\' => {
1075 content.push(b'\\');
1076 content.push(b);
1077 }
1078 _ => content.push(b),
1079 }
1080 }
1081 content.extend_from_slice(b") Tj\nET\n");
1082 let content_id = doc.add_object(Stream::new(dictionary! {}, content));
1083
1084 let pages_id = doc.new_object_id();
1085 let page_id = doc.add_object(Object::Dictionary(dictionary! {
1086 "Type" => "Page",
1087 "Parent" => Object::Reference(pages_id),
1088 "MediaBox" => Object::Array(vec![0.into(), 0.into(), 612.into(), 792.into()]),
1089 "Resources" => Object::Reference(resources_id),
1090 "Contents" => Object::Reference(content_id),
1091 }));
1092 doc.objects.insert(
1093 pages_id,
1094 Object::Dictionary(dictionary! {
1095 "Type" => "Pages",
1096 "Kids" => Object::Array(vec![Object::Reference(page_id)]),
1097 "Count" => Object::Integer(1),
1098 }),
1099 );
1100 let catalog_id = doc.add_object(Object::Dictionary(dictionary! {
1101 "Type" => "Catalog",
1102 "Pages" => Object::Reference(pages_id),
1103 }));
1104 doc.trailer.set("Root", Object::Reference(catalog_id));
1105
1106 let mut bytes = Vec::new();
1107 doc.save_to(&mut bytes).expect("save non-embedded fixture");
1108 bytes
1109 }
1110
1111 fn push_button_caption_pdf_bytes(caption: &[u8]) -> Vec<u8> {
1114 use lopdf::{dictionary, Document, Object, Stream, StringFormat};
1115
1116 let mut doc = Document::with_version("1.4");
1117
1118 let catalog_id = doc.new_object_id();
1119 let pages_id = doc.new_object_id();
1120 let page_id = doc.new_object_id();
1121 let acroform_id = doc.new_object_id();
1122 let content_id = doc.new_object_id();
1123 let widget_id = doc.new_object_id();
1124
1125 doc.objects.insert(
1126 content_id,
1127 Object::Stream(Stream::new(dictionary! {}, Vec::new())),
1128 );
1129 doc.objects.insert(
1130 widget_id,
1131 Object::Dictionary(dictionary! {
1132 "Type" => "Annot",
1133 "Subtype" => "Widget",
1134 "FT" => "Btn",
1135 "Ff" => Object::Integer(1 << 16),
1136 "T" => Object::String(b"button".to_vec(), StringFormat::Literal),
1137 "MK" => dictionary! {
1138 "CA" => Object::String(caption.to_vec(), StringFormat::Literal),
1139 },
1140 "Rect" => Object::Array(vec![100.into(), 700.into(), 260.into(), 730.into()]),
1141 "P" => Object::Reference(page_id),
1142 }),
1143 );
1144 doc.objects.insert(
1145 page_id,
1146 Object::Dictionary(dictionary! {
1147 "Type" => "Page",
1148 "Parent" => Object::Reference(pages_id),
1149 "MediaBox" => Object::Array(vec![0.into(), 0.into(), 612.into(), 792.into()]),
1150 "Annots" => Object::Array(vec![Object::Reference(widget_id)]),
1151 "Contents" => Object::Reference(content_id),
1152 }),
1153 );
1154 doc.objects.insert(
1155 pages_id,
1156 Object::Dictionary(dictionary! {
1157 "Type" => "Pages",
1158 "Kids" => Object::Array(vec![Object::Reference(page_id)]),
1159 "Count" => Object::Integer(1),
1160 }),
1161 );
1162 doc.objects.insert(
1163 acroform_id,
1164 Object::Dictionary(dictionary! {
1165 "Fields" => Object::Array(vec![Object::Reference(widget_id)]),
1166 }),
1167 );
1168 doc.objects.insert(
1169 catalog_id,
1170 Object::Dictionary(dictionary! {
1171 "Type" => "Catalog",
1172 "Pages" => Object::Reference(pages_id),
1173 "AcroForm" => Object::Reference(acroform_id),
1174 }),
1175 );
1176 doc.trailer.set("Root", Object::Reference(catalog_id));
1177
1178 let mut bytes = Vec::new();
1179 doc.save_to(&mut bytes)
1180 .expect("save push-button caption fixture");
1181 bytes
1182 }
1183
1184 #[test]
1185 fn extract_text_non_embedded_truetype_alias_resolves_via_winansi() {
1186 let bytes = non_embedded_truetype_pdf_bytes(
1192 b"TimesNewRoman",
1193 b"WinAnsiEncoding",
1194 b"UNITED STATES DISTRICT COURT",
1195 );
1196 let text = PdfDocument::open(bytes)
1197 .expect("open non-embedded TrueType fixture")
1198 .extract_text(0)
1199 .expect("extract non-embedded TrueType text");
1200 let norm = normalize_text(&text);
1201 assert!(
1202 norm.contains("UNITED STATES DISTRICT COURT"),
1203 "expected WinAnsi-decoded text, got: {norm:?}"
1204 );
1205 }
1206
1207 #[test]
1208 fn extract_text_non_embedded_truetype_unknown_name_still_decodes() {
1209 let bytes = non_embedded_truetype_pdf_bytes(
1216 b"OpaqueCustomXYZ",
1217 b"WinAnsiEncoding",
1218 b"Hello, world!",
1219 );
1220 let text = PdfDocument::open(bytes)
1221 .expect("open custom non-embedded fixture")
1222 .extract_text(0)
1223 .expect("extract custom non-embedded text");
1224 let norm = normalize_text(&text);
1225 assert!(
1226 norm.contains("Hello, world!"),
1227 "expected WinAnsi-decoded text, got: {norm:?}"
1228 );
1229 }
1230
1231 #[test]
1232 fn extract_acroform_text_includes_push_button_mk_caption() {
1233 let bytes = push_button_caption_pdf_bytes(b"Don't cry over spilt milk");
1234 let doc = PdfDocument::open(bytes).expect("open push-button caption fixture");
1235
1236 let page_text = doc.extract_text(0).expect("extract page text");
1237 assert!(
1238 normalize_text(&page_text).is_empty(),
1239 "expected empty page content stream, got: {page_text:?}"
1240 );
1241
1242 let acroform_text = doc.extract_acroform_text();
1243 assert_eq!(normalize_text(&acroform_text), "Don't cry over spilt milk");
1244
1245 let all_text = doc.extract_all_text();
1246 assert_eq!(normalize_text(&all_text), "Don't cry over spilt milk");
1247 }
1248
1249 #[test]
1250 fn bytes_to_string_utf8() {
1251 assert_eq!(bytes_to_string(b"hello"), "hello");
1252 }
1253
1254 #[test]
1255 fn bytes_to_string_latin1() {
1256 let bytes = &[0xC4, 0xD6, 0xDC]; let s = bytes_to_string(bytes);
1258 assert_eq!(s, "ÄÖÜ");
1259 }
1260
1261 #[test]
1262 fn bytes_to_string_utf16() {
1263 let bytes = &[0xFE, 0xFF, 0x00, 0x48, 0x00, 0x69]; assert_eq!(bytes_to_string(bytes), "Hi");
1265 }
1266
1267 #[test]
1268 fn document_info_default() {
1269 let info = DocumentInfo::default();
1270 assert!(info.title.is_none());
1271 assert!(info.author.is_none());
1272 }
1273
1274 #[test]
1275 fn bookmark_item_children() {
1276 let item = BookmarkItem {
1277 title: "Root".into(),
1278 page: None,
1279 children: vec![BookmarkItem {
1280 title: "Child".into(),
1281 page: Some(0),
1282 children: Vec::new(),
1283 }],
1284 };
1285 assert_eq!(item.children.len(), 1);
1286 assert_eq!(item.children[0].title, "Child");
1287 }
1288
1289 #[test]
1290 fn extract_text_type0_without_tounicode_uses_font_program_fallback() {
1291 let original = std::fs::read(corpus_path("sf181.pdf")).expect("read sf181 fixture");
1292 let expected = PdfDocument::open(original.clone())
1293 .expect("open original sf181")
1294 .extract_text(0)
1295 .expect("extract original sf181 text");
1296 assert!(
1297 expected.contains("Guide to Personnel Data Standards"),
1298 "unexpected baseline extraction: {expected}"
1299 );
1300
1301 let (stripped, removed) = strip_type0_tounicode(&original);
1302 assert!(
1303 removed > 0,
1304 "expected to strip at least one Type0 ToUnicode"
1305 );
1306
1307 let actual = PdfDocument::open(stripped)
1308 .expect("open stripped sf181")
1309 .extract_text(0)
1310 .expect("extract stripped sf181 text");
1311
1312 let actual_norm = normalize_text(&actual);
1313 let expected_norm = normalize_text(&expected);
1314
1315 assert!(
1316 actual_norm.contains("Guide to Personnel Data Standards"),
1317 "missing main heading after stripping ToUnicode: {actual_norm}"
1318 );
1319 assert!(
1320 actual_norm.contains("Privacy Act Statement"),
1321 "missing body text after stripping ToUnicode: {actual_norm}"
1322 );
1323 assert!(
1324 actual_norm.len() + 32 >= expected_norm.len(),
1325 "too much text lost after stripping ToUnicode: expected {} chars, got {}",
1326 expected_norm.len(),
1327 actual_norm.len()
1328 );
1329 }
1330
1331 #[test]
1332 fn extract_text_identity_h_bogus_tounicode_recovers_via_identity_fallback() {
1333 let bytes =
1341 std::fs::read(corpus_path("PDFBOX-4322-3.pdf")).expect("read PDFBOX-4322-3 fixture");
1342 let doc = PdfDocument::open(bytes).expect("open PDFBOX-4322-3");
1343 let text = doc.extract_all_text();
1344
1345 let norm = normalize_text(&text);
1346 assert!(
1347 norm.contains("Transatlantic Council"),
1348 "expected Identity-H codes to resolve as Unicode: {norm}"
1349 );
1350 assert!(
1351 norm.contains("Boy Scouts of America"),
1352 "expected body text to be recovered: {norm}"
1353 );
1354 }
1355
1356 #[test]
1357 fn render_max_pixels_none_is_unchanged_default_behavior() {
1358 let doc = PdfDocument::open(solid_fill_pdf_bytes("1 0 0 rg")).expect("open fixture");
1360 let baseline = doc
1361 .render_page(
1362 0,
1363 &RenderOptions {
1364 dpi: 144.0,
1365 ..Default::default()
1366 },
1367 )
1368 .expect("baseline render");
1369 let explicit_none = doc
1370 .render_page(
1371 0,
1372 &RenderOptions {
1373 dpi: 144.0,
1374 max_pixels: None,
1375 ..Default::default()
1376 },
1377 )
1378 .expect("explicit-none render");
1379 assert_eq!(baseline.width, explicit_none.width);
1380 assert_eq!(baseline.height, explicit_none.height);
1381 assert_eq!(baseline.pixels, explicit_none.pixels);
1382 }
1383
1384 #[test]
1385 fn render_max_pixels_budget_clamps_resolution() {
1386 let doc = PdfDocument::open(solid_fill_pdf_bytes("1 0 0 rg")).expect("open fixture");
1387 let full = doc
1388 .render_page(
1389 0,
1390 &RenderOptions {
1391 dpi: 288.0,
1392 ..Default::default()
1393 },
1394 )
1395 .expect("full render");
1396 let full_px = full.width * full.height;
1397 let budget = full_px / 4;
1399 let capped = doc
1400 .render_page(
1401 0,
1402 &RenderOptions {
1403 dpi: 288.0,
1404 max_pixels: Some(budget),
1405 ..Default::default()
1406 },
1407 )
1408 .expect("capped render");
1409 assert!(
1410 capped.width * capped.height <= full_px,
1411 "capped output must not exceed full output"
1412 );
1413 assert!(
1414 capped.width < full.width || capped.height < full.height,
1415 "budget below full pixel count must shrink at least one dimension"
1416 );
1417 }
1418
1419 #[test]
1420 fn render_max_pixels_large_budget_no_clamp() {
1421 let doc = PdfDocument::open(solid_fill_pdf_bytes("1 0 0 rg")).expect("open fixture");
1423 let baseline = doc
1424 .render_page(
1425 0,
1426 &RenderOptions {
1427 dpi: 72.0,
1428 ..Default::default()
1429 },
1430 )
1431 .expect("baseline");
1432 let huge = doc
1433 .render_page(
1434 0,
1435 &RenderOptions {
1436 dpi: 72.0,
1437 max_pixels: Some(100_000_000),
1438 ..Default::default()
1439 },
1440 )
1441 .expect("huge-budget render");
1442 assert_eq!(baseline.width, huge.width);
1443 assert_eq!(baseline.height, huge.height);
1444 assert_eq!(baseline.pixels, huge.pixels);
1445 }
1446
1447 #[test]
1448 fn render_page_with_config_srgb_matches_legacy_render_page() {
1449 let doc = PdfDocument::open(solid_fill_pdf_bytes("1 0 0 rg")).expect("open rgb fixture");
1450 let legacy = doc
1451 .render_page(
1452 0,
1453 &RenderOptions {
1454 dpi: 72.0,
1455 ..Default::default()
1456 },
1457 )
1458 .expect("legacy render succeeds");
1459 let configured = doc
1460 .render_page_with_config(
1461 0,
1462 &RenderConfig {
1463 color_mode: ColorMode::Srgb,
1464 dpi: 72,
1465 },
1466 )
1467 .expect("configured render succeeds");
1468
1469 assert_eq!(legacy.width, configured.width);
1470 assert_eq!(legacy.height, configured.height);
1471 assert_eq!(legacy.pixel_format, PixelFormat::Rgba8);
1472 assert_eq!(configured.pixel_format, PixelFormat::Rgba8);
1473 assert_eq!(legacy.pixels, configured.pixels);
1474 }
1475
1476 #[test]
1477 fn render_page_with_config_preserve_cmyk_returns_cmyk_buffer() {
1478 let doc = PdfDocument::open(solid_fill_pdf_bytes("1 0 0 0 k")).expect("open cmyk fixture");
1479 let rendered = doc
1480 .render_page_with_config(
1481 0,
1482 &RenderConfig {
1483 color_mode: ColorMode::PreserveCmyk,
1484 dpi: 72,
1485 },
1486 )
1487 .expect("cmyk render succeeds");
1488
1489 assert_eq!(rendered.pixel_format, PixelFormat::Cmyk8);
1490 assert_eq!(
1491 rendered.pixels.len(),
1492 rendered.width as usize * rendered.height as usize * 4
1493 );
1494 assert_eq!(
1495 pixel_at(&rendered, rendered.width / 2, rendered.height / 2),
1496 crate::color::preserve_device_cmyk(1.0, 0.0, 0.0, 0.0)
1497 );
1498 }
1499
1500 #[test]
1501 fn render_page_with_config_simulate_cmyk_does_not_panic_on_cmyk_pdf() {
1502 let doc = PdfDocument::open(solid_fill_pdf_bytes("1 0 0 0 k")).expect("open cmyk fixture");
1503 let rendered = doc
1504 .render_page_with_config(
1505 0,
1506 &RenderConfig {
1507 color_mode: ColorMode::SimulateCmyk,
1508 dpi: 72,
1509 },
1510 )
1511 .expect("simulate cmyk render succeeds");
1512
1513 assert_eq!(rendered.pixel_format, PixelFormat::Rgba8);
1514 assert!(!rendered.pixels.is_empty());
1515 }
1516
1517 #[test]
1518 fn render_page_with_config_preserve_cmyk_mixed_page_preserves_only_cmyk_region() {
1519 let doc = PdfDocument::open(mixed_rgb_cmyk_pdf_bytes()).expect("open mixed fixture");
1520 let rendered = doc
1521 .render_page_with_config(
1522 0,
1523 &RenderConfig {
1524 color_mode: ColorMode::PreserveCmyk,
1525 dpi: 72,
1526 },
1527 )
1528 .expect("mixed render succeeds");
1529
1530 assert_eq!(
1531 pixel_at(&rendered, 54, 36),
1532 crate::color::preserve_device_cmyk(1.0, 0.0, 0.0, 0.0)
1533 );
1534 assert_ne!(
1535 pixel_at(&rendered, 18, 36),
1536 crate::color::preserve_device_cmyk(1.0, 0.0, 0.0, 0.0)
1537 );
1538 }
1539
1540 #[test]
1541 fn render_page_with_config_preserve_cmyk_transparent_page_does_not_crash() {
1542 let doc =
1543 PdfDocument::open(transparent_cmyk_pdf_bytes()).expect("open transparent cmyk fixture");
1544 let rendered = doc
1545 .render_page_with_config(
1546 0,
1547 &RenderConfig {
1548 color_mode: ColorMode::PreserveCmyk,
1549 dpi: 72,
1550 },
1551 )
1552 .expect("transparent cmyk render succeeds");
1553
1554 assert_eq!(rendered.pixel_format, PixelFormat::Cmyk8);
1555 assert_eq!(
1556 rendered.pixels.len(),
1557 rendered.width as usize * rendered.height as usize * 4
1558 );
1559 }
1560
1561 #[test]
1562 fn render_page_with_config_preserve_cmyk_keeps_device_cmyk_image_bytes() {
1563 let doc = PdfDocument::open(cmyk_image_pdf_bytes()).expect("open cmyk image fixture");
1564 let rendered = doc
1565 .render_page_with_config(
1566 0,
1567 &RenderConfig {
1568 color_mode: ColorMode::PreserveCmyk,
1569 dpi: 72,
1570 },
1571 )
1572 .expect("cmyk image render succeeds");
1573
1574 assert_eq!(rendered.width, 2);
1575 assert_eq!(rendered.height, 1);
1576 assert_eq!(pixel_at(&rendered, 0, 0), [255, 0, 0, 0]);
1577 assert_eq!(pixel_at(&rendered, 1, 0), [0, 255, 0, 0]);
1578 }
1579}