use crate::bbox::BoundingBox;
use crate::error::{Error, Result};
use hayro::{render, InterpreterSettings, Pdf as HayroPdf, RenderSettings};
use std::sync::Arc;
pub fn detect_bbox_by_rendering(
pdf_bytes: &[u8],
page_num: usize,
dpi: Option<f32>,
) -> Result<BoundingBox> {
let dpi = dpi.unwrap_or(72.0); let scale = dpi / 72.0;
let data = Arc::new(pdf_bytes.to_vec());
let pdf = HayroPdf::new(data)
.map_err(|e| Error::PdfParse(format!("hayro failed to load PDF: {:?}", e)))?;
let page = pdf
.pages()
.get(page_num)
.ok_or_else(|| Error::InvalidPage(format!("page {} not found", page_num)))?;
let interpreter_settings = InterpreterSettings::default();
let render_settings = RenderSettings {
x_scale: scale,
y_scale: scale,
..Default::default()
};
let pixmap = render(page, &interpreter_settings, &render_settings);
let bbox = scan_pixmap_for_content(&pixmap, scale)?;
Ok(bbox)
}
fn scan_pixmap_for_content(pixmap: &hayro::Pixmap, scale: f32) -> Result<BoundingBox> {
let width = pixmap.width() as usize;
let height = pixmap.height() as usize;
let pixels = pixmap.data_as_u8_slice();
let mut min_x = width;
let mut max_x = 0;
let mut min_y = height;
let mut max_y = 0;
const WHITE_THRESHOLD: u8 = 250;
for y in 0..height {
for x in 0..width {
let idx = (y * width + x) * 4;
if idx + 2 < pixels.len() {
let r = pixels[idx];
let g = pixels[idx + 1];
let b = pixels[idx + 2];
if r < WHITE_THRESHOLD || g < WHITE_THRESHOLD || b < WHITE_THRESHOLD {
min_x = min_x.min(x);
max_x = max_x.max(x);
min_y = min_y.min(y);
max_y = max_y.max(y);
}
}
}
}
if min_x > max_x || min_y > max_y {
return Err(Error::EmptyPage(0)); }
let left = (min_x as f32) / scale;
let right = (max_x as f32 + 1.0) / scale;
let pdf_height = (height as f32) / scale;
let bottom = pdf_height - ((max_y as f32 + 1.0) / scale);
let top = pdf_height - ((min_y as f32) / scale);
BoundingBox::new(left as f64, bottom as f64, right as f64, top as f64)
}
#[cfg(test)]
mod tests {
#[test]
fn test_render_bbox_basic() {
}
}