use std::fs;
use std::path::{Path, PathBuf};
const BATCH_SIZE: usize = 10;
pub fn render_pdf_pages(
pdf_path: &str,
max_pages: usize,
output_dir: &str,
) -> Result<Vec<PathBuf>, String> {
let pdf = PathBuf::from(pdf_path);
let out = PathBuf::from(output_dir);
if !pdf.exists() {
return Err(format!("PDF file not found: {}", pdf_path));
}
fs::create_dir_all(&out)
.map_err(|e| format!("Failed to create output directory '{}': {}", output_dir, e))?;
let result = render_pdf_pages_inner(&pdf, max_pages, &out);
if let Err(ref err) = result {
tracing::warn!("PDF render failed, cleaning up partial output: {}", err);
cleanup_dir(&out);
}
result
}
fn render_pdf_pages_inner(
pdf_path: &Path,
max_pages: usize,
output_dir: &Path,
) -> Result<Vec<PathBuf>, String> {
match render_with_pdfium(pdf_path, max_pages, output_dir) {
Ok(paths) => return Ok(paths),
Err(e) => tracing::debug!("pdfium-render unavailable or failed: {}", e),
}
match render_with_pdftoppm(pdf_path, max_pages, output_dir) {
Ok(paths) => return Ok(paths),
Err(e) => tracing::debug!("pdftoppm unavailable or failed: {}", e),
}
Err(
"No PDF renderer available. Install poppler-utils (pdftoppm) or enable the \
'pdfium-render' feature."
.into(),
)
}
fn pdf_page_count(pdf_path: &Path) -> Option<usize> {
let path_str = pdf_path.to_str()?;
let out = std::process::Command::new("pdfinfo")
.arg(path_str)
.output()
.ok()?;
if !out.status.success() {
return None;
}
let text = String::from_utf8_lossy(&out.stdout);
for line in text.lines() {
if let Some(rest) = line.strip_prefix("Pages:") {
return rest.trim().parse::<usize>().ok();
}
}
None
}
#[cfg(feature = "pdfium")]
fn render_with_pdfium(
pdf_path: &Path,
max_pages: usize,
output_dir: &Path,
) -> Result<Vec<PathBuf>, String> {
use pdfium_render::prelude::*;
let pdfium = Pdfium::new(
Pdfium::bind_to_system_library()
.map_err(|e| format!("Cannot bind pdfium library: {}", e))?,
);
let document = pdfium
.load_pdf_from_file(pdf_path, None)
.map_err(|e| format!("Failed to open PDF '{}': {}", pdf_path.display(), e))?;
let total_pages = document.pages().len() as usize;
let pages_to_render = total_pages.min(max_pages);
if total_pages > max_pages {
tracing::warn!(
"PDF has {} pages but max_pages={}; skipping pages {}–{}",
total_pages,
max_pages,
max_pages + 1,
total_pages,
);
}
let mut rendered = Vec::with_capacity(pages_to_render);
let mut page_idx: usize = 0;
while page_idx < pages_to_render {
let batch_end = (page_idx + BATCH_SIZE).min(pages_to_render);
for i in page_idx..batch_end {
let page = document
.pages()
.get(i as PdfPageIndex)
.map_err(|e| format!("Failed to get page {}: {}", i + 1, e))?;
let bitmap = page
.render_with_config(&PdfRenderConfig::new().set_target_width(2000))
.map_err(|e| format!("Failed to render page {}: {}", i + 1, e))?;
let file_name = format!("page_{:04}.png", i + 1);
let file_path = output_dir.join(&file_name);
bitmap
.as_image()
.map_err(|e| format!("Failed to convert page {} to image: {}", i + 1, e))?
.save(&file_path)
.map_err(|e| format!("Failed to save page {}: {}", i + 1, e))?;
rendered.push(file_path);
}
page_idx = batch_end;
tracing::debug!("Rendered batch: pages {}/{}", page_idx, pages_to_render,);
}
Ok(rendered)
}
#[cfg(not(feature = "pdfium"))]
fn render_with_pdfium(
_pdf_path: &Path,
_max_pages: usize,
_output_dir: &Path,
) -> Result<Vec<PathBuf>, String> {
Err("pdfium feature not enabled".into())
}
fn render_with_pdftoppm(
pdf_path: &Path,
max_pages: usize,
output_dir: &Path,
) -> Result<Vec<PathBuf>, String> {
which::which("pdftoppm").map_err(|_| "pdftoppm not found in PATH".to_string())?;
let pdf_path_str = pdf_path
.to_str()
.ok_or_else(|| "PDF path is not valid UTF-8".to_string())?;
let out_dir_str = output_dir
.to_str()
.ok_or_else(|| "Output directory path is not valid UTF-8".to_string())?;
let effective_max = match pdf_page_count(pdf_path) {
Some(total) => total.min(max_pages),
None => max_pages,
};
if effective_max == 0 {
return Err("pdftoppm: PDF reports zero pages".into());
}
let prefix = "page";
let mut rendered: Vec<PathBuf> = Vec::with_capacity(effective_max);
let mut start = 1;
let mut last_err: Option<String> = None;
while start <= effective_max {
let batch_end = (start + BATCH_SIZE - 1).min(effective_max);
let output = std::process::Command::new("pdftoppm")
.args([
"-png",
"-r",
"200",
"-f",
&start.to_string(),
"-l",
&batch_end.to_string(),
pdf_path_str,
&format!("{}/{}", out_dir_str, prefix),
])
.output()
.map_err(|e| format!("Failed to execute pdftoppm: {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
tracing::warn!(
"pdftoppm batch {}-{} failed: {} — keeping earlier pages",
start,
batch_end,
stderr.trim()
);
last_err = Some(stderr.trim().to_string());
start = batch_end + 1;
continue;
}
for page_num in start..=batch_end {
let file_path = output_dir
.join(format!("{}-{:02}.png", prefix, page_num))
.canonicalize()
.or_else(|_| {
output_dir
.join(format!("{}-{:03}.png", prefix, page_num))
.canonicalize()
})
.or_else(|_| {
output_dir
.join(format!("{}-{:04}.png", prefix, page_num))
.canonicalize()
});
if let Ok(p) = file_path {
rendered.push(p);
}
}
start = batch_end + 1;
}
if rendered.is_empty() {
return Err(format!(
"pdftoppm produced no output files{}",
last_err
.map(|e| format!(" (last error: {e})"))
.unwrap_or_default()
));
}
Ok(rendered)
}
fn cleanup_dir(dir: &Path) {
let Ok(entries) = fs::read_dir(dir) else {
return;
};
for entry in entries.flatten() {
let Ok(metadata) = entry.metadata() else {
continue;
};
if metadata.is_file() {
let _ = fs::remove_file(entry.path());
}
}
}