Skip to main content

gaze_document/extract/
pdf.rs

1//! Single-page PDF rasterization via [`pdfium-render`](https://crates.io/crates/pdfium-render).
2//!
3//! ## Runtime dependency
4//!
5//! `pdfium-render` dynamically loads the pdfium shared library at runtime.
6//! Adopters must have `libpdfium` reachable to the process (system library,
7//! `LD_LIBRARY_PATH` / `DYLD_LIBRARY_PATH`, or alongside the executable).
8//!
9//! Per-OS install guidance is surfaced in [`DocumentError::PdfiumNotFound`]
10//! whenever binding fails.
11//!
12//! ## Scope (v0.0.x)
13//!
14//! * Only page index `0` is rasterized. Multi-page PDFs are accepted but the
15//!   first page wins. Multi-page support is incremental on top.
16//! * Target resolution: 150 DPI, configurable via [`PdfRasterConfig`].
17
18use std::io::Cursor;
19use std::path::Path;
20
21use image::ImageFormat;
22use pdfium_render::prelude::{PdfRenderConfig, Pdfium, PdfiumError};
23
24use crate::DocumentError;
25
26/// Configuration for one PDF rasterization pass.
27#[non_exhaustive]
28#[derive(Debug, Clone, Copy)]
29pub struct PdfRasterConfig {
30    /// Target image width in pixels (height auto-scales).
31    pub width_px: u32,
32    /// Target image height in pixels (height auto-scales when 0).
33    pub height_px: u32,
34    /// Zero-based page index to rasterize.
35    pub page_index: i32,
36}
37
38impl PdfRasterConfig {
39    /// Default config: 1240×1754 (≈150 DPI A4) on page 0.
40    pub fn new() -> Self {
41        Self {
42            width_px: 1240,
43            height_px: 1754,
44            page_index: 0,
45        }
46    }
47}
48
49impl Default for PdfRasterConfig {
50    fn default() -> Self {
51        Self::new()
52    }
53}
54
55/// Result of rasterizing a PDF page.
56#[non_exhaustive]
57#[derive(Debug, Clone)]
58pub struct RasterizedPage {
59    /// PNG-encoded image bytes.
60    pub png_bytes: Vec<u8>,
61    /// Page index that was rasterized.
62    pub page_index: i32,
63    /// Total page count in the source document.
64    pub page_count: i32,
65    /// Width in pixels of the rasterized page.
66    pub width_px: u32,
67    /// Height in pixels of the rasterized page.
68    pub height_px: u32,
69}
70
71impl RasterizedPage {
72    /// Build a [`RasterizedPage`] from already-encoded fields.
73    pub fn new(
74        png_bytes: Vec<u8>,
75        page_index: i32,
76        page_count: i32,
77        width_px: u32,
78        height_px: u32,
79    ) -> Self {
80        Self {
81            png_bytes,
82            page_index,
83            page_count,
84            width_px,
85            height_px,
86        }
87    }
88}
89
90/// Rasterize a single page of a PDF on disk to PNG bytes.
91///
92/// # Errors
93///
94/// * [`DocumentError::PdfiumNotFound`] — pdfium dynamic library could not be
95///   located. Payload carries per-OS install guidance.
96/// * [`DocumentError::PdfRasterFailed`] — pdfium reported an error while
97///   opening or rendering the document.
98pub fn rasterize_first_page(
99    path: &Path,
100    config: PdfRasterConfig,
101) -> Result<RasterizedPage, DocumentError> {
102    let bindings = Pdfium::bind_to_system_library().map_err(|err| {
103        DocumentError::PdfiumNotFound(format!("{}. {}", err, pdfium_install_hint()))
104    })?;
105    let pdfium = Pdfium::new(bindings);
106    let document = pdfium
107        .load_pdf_from_file(path, None)
108        .map_err(map_pdfium_error)?;
109    let pages = document.pages();
110    let page_count = pages.len();
111    if page_count == 0 {
112        return Err(DocumentError::PdfRasterFailed(
113            "input PDF contains zero pages".to_string(),
114        ));
115    }
116
117    if config.page_index < 0 || config.page_index >= page_count {
118        return Err(DocumentError::PdfRasterFailed(format!(
119            "requested page index {} but document has {} page(s)",
120            config.page_index, page_count
121        )));
122    }
123
124    let page = pages.get(config.page_index).map_err(map_pdfium_error)?;
125    let mut render_config = PdfRenderConfig::new().set_target_width(config.width_px as i32);
126    if config.height_px > 0 {
127        render_config = render_config.set_maximum_height(config.height_px as i32);
128    }
129    let bitmap = page
130        .render_with_config(&render_config)
131        .map_err(map_pdfium_error)?;
132    let dynamic_image = bitmap.as_image().map_err(map_pdfium_error)?;
133    let (width, height) = (dynamic_image.width(), dynamic_image.height());
134
135    let mut buf = Cursor::new(Vec::with_capacity(64 * 1024));
136    dynamic_image
137        .write_to(&mut buf, ImageFormat::Png)
138        .map_err(|err| DocumentError::PdfRasterFailed(format!("png encode failed: {err}")))?;
139
140    Ok(RasterizedPage {
141        png_bytes: buf.into_inner(),
142        page_index: config.page_index,
143        page_count,
144        width_px: width,
145        height_px: height,
146    })
147}
148
149fn map_pdfium_error(err: PdfiumError) -> DocumentError {
150    DocumentError::PdfRasterFailed(err.to_string())
151}
152
153fn pdfium_install_hint() -> String {
154    if cfg!(target_os = "macos") {
155        "Download the pdfium dynamic library from https://github.com/bblanchon/pdfium-binaries \
156         and place `libpdfium.dylib` on DYLD_LIBRARY_PATH, in /usr/local/lib, or next to your binary."
157            .to_string()
158    } else if cfg!(target_os = "linux") {
159        "Download the pdfium dynamic library from https://github.com/bblanchon/pdfium-binaries \
160         and place `libpdfium.so` on LD_LIBRARY_PATH, in /usr/local/lib, or next to your binary."
161            .to_string()
162    } else if cfg!(target_os = "windows") {
163        "Download the pdfium dynamic library from https://github.com/bblanchon/pdfium-binaries \
164         and place `pdfium.dll` on PATH or next to your executable."
165            .to_string()
166    } else {
167        "Download the pdfium dynamic library from https://github.com/bblanchon/pdfium-binaries."
168            .to_string()
169    }
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn raster_config_defaults_to_first_page_150_dpi() {
178        let cfg = PdfRasterConfig::new();
179        assert_eq!(cfg.page_index, 0);
180        assert_eq!(cfg.width_px, 1240);
181        assert_eq!(cfg.height_px, 1754);
182    }
183
184    #[test]
185    fn install_hint_is_non_empty() {
186        assert!(!pdfium_install_hint().is_empty());
187    }
188}