gaze_document/extract/
pdf.rs1use std::io::Cursor;
19use std::path::Path;
20
21use image::ImageFormat;
22use pdfium_render::prelude::{PdfRenderConfig, Pdfium, PdfiumError};
23
24use crate::DocumentError;
25
26#[non_exhaustive]
28#[derive(Debug, Clone, Copy)]
29pub struct PdfRasterConfig {
30 pub width_px: u32,
32 pub height_px: u32,
34 pub page_index: i32,
36}
37
38impl PdfRasterConfig {
39 pub fn new() -> Self {
41 Self {
42 width_px: 1240,
43 height_px: 1754,
44 page_index: 0,
45 }
46 }
47}
48
49impl Default for PdfRasterConfig {
50 fn default() -> Self {
51 Self::new()
52 }
53}
54
55#[non_exhaustive]
57#[derive(Debug, Clone)]
58pub struct RasterizedPage {
59 pub png_bytes: Vec<u8>,
61 pub page_index: i32,
63 pub page_count: i32,
65 pub width_px: u32,
67 pub height_px: u32,
69}
70
71impl RasterizedPage {
72 pub fn new(
74 png_bytes: Vec<u8>,
75 page_index: i32,
76 page_count: i32,
77 width_px: u32,
78 height_px: u32,
79 ) -> Self {
80 Self {
81 png_bytes,
82 page_index,
83 page_count,
84 width_px,
85 height_px,
86 }
87 }
88}
89
90pub fn rasterize_first_page(
99 path: &Path,
100 config: PdfRasterConfig,
101) -> Result<RasterizedPage, DocumentError> {
102 let bindings = Pdfium::bind_to_system_library().map_err(|err| {
103 DocumentError::PdfiumNotFound(format!("{}. {}", err, pdfium_install_hint()))
104 })?;
105 let pdfium = Pdfium::new(bindings);
106 let document = pdfium
107 .load_pdf_from_file(path, None)
108 .map_err(map_pdfium_error)?;
109 let pages = document.pages();
110 let page_count = pages.len();
111 if page_count == 0 {
112 return Err(DocumentError::PdfRasterFailed(
113 "input PDF contains zero pages".to_string(),
114 ));
115 }
116
117 if config.page_index < 0 || config.page_index >= page_count {
118 return Err(DocumentError::PdfRasterFailed(format!(
119 "requested page index {} but document has {} page(s)",
120 config.page_index, page_count
121 )));
122 }
123
124 let page = pages.get(config.page_index).map_err(map_pdfium_error)?;
125 let mut render_config = PdfRenderConfig::new().set_target_width(config.width_px as i32);
126 if config.height_px > 0 {
127 render_config = render_config.set_maximum_height(config.height_px as i32);
128 }
129 let bitmap = page
130 .render_with_config(&render_config)
131 .map_err(map_pdfium_error)?;
132 let dynamic_image = bitmap.as_image().map_err(map_pdfium_error)?;
133 let (width, height) = (dynamic_image.width(), dynamic_image.height());
134
135 let mut buf = Cursor::new(Vec::with_capacity(64 * 1024));
136 dynamic_image
137 .write_to(&mut buf, ImageFormat::Png)
138 .map_err(|err| DocumentError::PdfRasterFailed(format!("png encode failed: {err}")))?;
139
140 Ok(RasterizedPage {
141 png_bytes: buf.into_inner(),
142 page_index: config.page_index,
143 page_count,
144 width_px: width,
145 height_px: height,
146 })
147}
148
149fn map_pdfium_error(err: PdfiumError) -> DocumentError {
150 DocumentError::PdfRasterFailed(err.to_string())
151}
152
153fn pdfium_install_hint() -> String {
154 if cfg!(target_os = "macos") {
155 "Download the pdfium dynamic library from https://github.com/bblanchon/pdfium-binaries \
156 and place `libpdfium.dylib` on DYLD_LIBRARY_PATH, in /usr/local/lib, or next to your binary."
157 .to_string()
158 } else if cfg!(target_os = "linux") {
159 "Download the pdfium dynamic library from https://github.com/bblanchon/pdfium-binaries \
160 and place `libpdfium.so` on LD_LIBRARY_PATH, in /usr/local/lib, or next to your binary."
161 .to_string()
162 } else if cfg!(target_os = "windows") {
163 "Download the pdfium dynamic library from https://github.com/bblanchon/pdfium-binaries \
164 and place `pdfium.dll` on PATH or next to your executable."
165 .to_string()
166 } else {
167 "Download the pdfium dynamic library from https://github.com/bblanchon/pdfium-binaries."
168 .to_string()
169 }
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175
176 #[test]
177 fn raster_config_defaults_to_first_page_150_dpi() {
178 let cfg = PdfRasterConfig::new();
179 assert_eq!(cfg.page_index, 0);
180 assert_eq!(cfg.width_px, 1240);
181 assert_eq!(cfg.height_px, 1754);
182 }
183
184 #[test]
185 fn install_hint_is_non_empty() {
186 assert!(!pdfium_install_hint().is_empty());
187 }
188}