use image::RgbImage;
use pdfium_render::prelude::*;
#[derive(Debug, Clone)]
pub struct TextCell {
pub text: String,
pub l: f32,
pub t: f32,
pub r: f32,
pub b: f32,
}
pub const RENDER_SCALE: f32 = 2.0;
#[derive(Clone)]
pub struct PdfPage {
pub width: f32,
pub height: f32,
pub scale: f32,
pub cells: Vec<TextCell>,
pub image: RgbImage,
}
pub struct PdfDocument {
pub pages: Vec<PdfPage>,
}
fn bind() -> Result<Pdfium, PdfiumError> {
if let Ok(path) = std::env::var("PDFIUM_DYNAMIC_LIB_PATH") {
let name = Pdfium::pdfium_platform_library_name_at_path(&path);
if let Ok(b) = Pdfium::bind_to_library(&name) {
return Ok(Pdfium::new(b));
}
if let Ok(b) = Pdfium::bind_to_library(&path) {
return Ok(Pdfium::new(b));
}
}
Pdfium::bind_to_system_library().map(Pdfium::new)
}
impl PdfDocument {
pub fn open(bytes: &[u8], password: Option<&str>) -> Result<Self, PdfiumError> {
let pdfium = bind()?;
let doc = pdfium.load_pdf_from_byte_slice(bytes, password)?;
let mut pages = Vec::new();
for page in doc.pages().iter() {
pages.push(extract_page(&page)?);
}
Ok(PdfDocument { pages })
}
}
fn extract_page(page: &pdfium_render::prelude::PdfPage<'_>) -> Result<PdfPage, PdfiumError> {
let width = page.width().value;
let height = page.height().value;
let text = page.text()?;
let mut cells = Vec::new();
for segment in text.segments().iter() {
let rect = segment.bounds();
let s = segment.text();
if s.trim().is_empty() {
continue;
}
cells.push(TextCell {
text: s,
l: rect.left().value,
t: height - rect.top().value,
r: rect.right().value,
b: height - rect.bottom().value,
});
}
let tw = (width * RENDER_SCALE).round().max(1.0) as i32;
let th = (height * RENDER_SCALE).round().max(1.0) as i32;
let cfg = PdfRenderConfig::new()
.set_target_width(tw)
.set_target_height(th);
let bitmap = page.render_with_config(&cfg)?;
let image = bitmap.as_image().into_rgb8();
Ok(PdfPage {
width,
height,
scale: RENDER_SCALE,
cells,
image,
})
}