fleischwolf_pdf/
pdfium_backend.rs1use image::RgbImage;
5use pdfium_render::prelude::*;
6
7#[derive(Debug, Clone)]
11pub struct TextCell {
12 pub text: String,
13 pub l: f32,
14 pub t: f32,
15 pub r: f32,
16 pub b: f32,
17}
18
19pub const RENDER_SCALE: f32 = 2.0;
23
24#[derive(Clone)]
28pub struct PdfPage {
29 pub width: f32,
30 pub height: f32,
31 pub scale: f32,
32 pub cells: Vec<TextCell>,
33 pub image: RgbImage,
34}
35
36pub struct PdfDocument {
38 pub pages: Vec<PdfPage>,
39}
40
41fn bind() -> Result<Pdfium, PdfiumError> {
45 if let Ok(path) = std::env::var("PDFIUM_DYNAMIC_LIB_PATH") {
46 let name = Pdfium::pdfium_platform_library_name_at_path(&path);
47 if let Ok(b) = Pdfium::bind_to_library(&name) {
48 return Ok(Pdfium::new(b));
49 }
50 if let Ok(b) = Pdfium::bind_to_library(&path) {
51 return Ok(Pdfium::new(b));
52 }
53 }
54 Pdfium::bind_to_system_library().map(Pdfium::new)
55}
56
57impl PdfDocument {
58 pub fn open(bytes: &[u8], password: Option<&str>) -> Result<Self, PdfiumError> {
63 let pdfium = bind()?;
64 let doc = pdfium.load_pdf_from_byte_slice(bytes, password)?;
65 let mut pages = Vec::new();
66 for page in doc.pages().iter() {
67 pages.push(extract_page(&page)?);
68 }
69 Ok(PdfDocument { pages })
70 }
71}
72
73pub fn for_each_page<E, F>(bytes: &[u8], password: Option<&str>, mut f: F) -> Result<(), E>
80where
81 E: From<PdfiumError>,
82 F: FnMut(usize, usize, PdfPage) -> Result<(), E>,
83{
84 let pdfium = bind()?;
85 let doc = pdfium.load_pdf_from_byte_slice(bytes, password)?;
86 let pages = doc.pages();
87 let total = pages.len() as usize;
88 for (i, page) in pages.iter().enumerate() {
89 let extracted = extract_page(&page)?;
90 f(i, total, extracted)?;
91 }
92 Ok(())
93}
94
95fn extract_page(page: &pdfium_render::prelude::PdfPage<'_>) -> Result<PdfPage, PdfiumError> {
96 let width = page.width().value;
97 let height = page.height().value;
98
99 let text = page.text()?;
100 let mut cells = Vec::new();
101 for segment in text.segments().iter() {
102 let rect = segment.bounds();
103 let s = segment.text();
104 if s.trim().is_empty() {
105 continue;
106 }
107 cells.push(TextCell {
109 text: s,
110 l: rect.left().value,
111 t: height - rect.top().value,
112 r: rect.right().value,
113 b: height - rect.bottom().value,
114 });
115 }
116
117 let tw = (width * RENDER_SCALE).round().max(1.0) as i32;
118 let th = (height * RENDER_SCALE).round().max(1.0) as i32;
119 let cfg = PdfRenderConfig::new()
120 .set_target_width(tw)
121 .set_target_height(th);
122 let bitmap = page.render_with_config(&cfg)?;
123 let image = bitmap.as_image().into_rgb8();
124
125 Ok(PdfPage {
126 width,
127 height,
128 scale: RENDER_SCALE,
129 cells,
130 image,
131 })
132}