use std::io::Read;
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use pdfium_render::prelude::Pdfium;
use thiserror::Error;
pub const PDFIUM_VERSION: &str = "7690";
const BASE_URL: &str = "https://github.com/bblanchon/pdfium-binaries/releases/download";
#[derive(Error, Debug)]
pub enum PdfiumAutoError {
#[error("Unsupported platform: {os}/{arch}")]
UnsupportedPlatform { os: String, arch: String },
#[error("Cache directory error: {0}")]
CacheDir(#[source] std::io::Error),
#[error("Download failed: {0}")]
Download(String),
#[error("Archive extraction failed: {0}")]
Extract(String),
#[error("Failed to bind PDFium from '{path}': {reason}")]
Bind { path: PathBuf, reason: String },
}
struct PlatformInfo {
archive_name: &'static str,
lib_path_in_archive: &'static str,
lib_name: &'static str,
}
fn detect_platform() -> Result<PlatformInfo, PdfiumAutoError> {
let os = std::env::consts::OS;
let arch = std::env::consts::ARCH;
match (os, arch) {
("macos", "aarch64") => Ok(PlatformInfo {
archive_name: "pdfium-mac-arm64.tgz",
lib_path_in_archive: "lib/libpdfium.dylib",
lib_name: "libpdfium.dylib",
}),
("macos", "x86_64") => Ok(PlatformInfo {
archive_name: "pdfium-mac-x64.tgz",
lib_path_in_archive: "lib/libpdfium.dylib",
lib_name: "libpdfium.dylib",
}),
("linux", "x86_64") => Ok(PlatformInfo {
archive_name: "pdfium-linux-x64.tgz",
lib_path_in_archive: "lib/libpdfium.so",
lib_name: "libpdfium.so",
}),
("linux", "aarch64") => Ok(PlatformInfo {
archive_name: "pdfium-linux-arm64.tgz",
lib_path_in_archive: "lib/libpdfium.so",
lib_name: "libpdfium.so",
}),
("windows", "x86_64") => Ok(PlatformInfo {
archive_name: "pdfium-win-x64.tgz",
lib_path_in_archive: "bin/pdfium.dll",
lib_name: "pdfium.dll",
}),
("windows", "aarch64") => Ok(PlatformInfo {
archive_name: "pdfium-win-arm64.tgz",
lib_path_in_archive: "bin/pdfium.dll",
lib_name: "pdfium.dll",
}),
("windows", "x86") => Ok(PlatformInfo {
archive_name: "pdfium-win-x86.tgz",
lib_path_in_archive: "bin/pdfium.dll",
lib_name: "pdfium.dll",
}),
(os, arch) => Err(PdfiumAutoError::UnsupportedPlatform {
os: os.to_string(),
arch: arch.to_string(),
}),
}
}
pub fn pdfium_cache_dir() -> PathBuf {
if let Ok(override_dir) = std::env::var("PDFIUM_AUTO_CACHE_DIR") {
return PathBuf::from(override_dir).join(format!("pdfium-{PDFIUM_VERSION}"));
}
let base = dirs::cache_dir()
.or_else(|| dirs::home_dir().map(|h| h.join(".cache")))
.unwrap_or_else(std::env::temp_dir);
base.join("pdf2md").join(format!("pdfium-{PDFIUM_VERSION}"))
}
static RESOLVED_PATH: OnceLock<PathBuf> = OnceLock::new();
pub fn is_pdfium_cached() -> bool {
if let Ok(p) = std::env::var("PDFIUM_LIB_PATH") {
return PathBuf::from(p).exists();
}
if let Ok(info) = detect_platform() {
return pdfium_cache_dir().join(info.lib_name).exists();
}
false
}
pub fn cached_pdfium_path() -> Option<PathBuf> {
if let Ok(p) = std::env::var("PDFIUM_LIB_PATH") {
let pb = PathBuf::from(p);
if pb.exists() {
return Some(pb);
}
}
if let Ok(info) = detect_platform() {
let p = pdfium_cache_dir().join(info.lib_name);
if p.exists() {
return Some(p);
}
}
None
}
pub fn ensure_pdfium_library(
on_progress: Option<&dyn Fn(u64, Option<u64>)>,
) -> Result<PathBuf, PdfiumAutoError> {
if let Some(path) = RESOLVED_PATH.get() {
return Ok(path.clone());
}
let path = resolve_or_download(on_progress)?;
let _ = RESOLVED_PATH.set(path.clone());
Ok(path)
}
pub fn bind_pdfium(
on_progress: Option<&dyn Fn(u64, Option<u64>)>,
) -> Result<Pdfium, PdfiumAutoError> {
let lib_path = ensure_pdfium_library(on_progress)?;
bind_pdfium_from_path(&lib_path)
}
pub fn bind_pdfium_silent() -> Result<Pdfium, PdfiumAutoError> {
bind_pdfium(None)
}
pub fn bind_pdfium_from_path(path: &Path) -> Result<Pdfium, PdfiumAutoError> {
Pdfium::bind_to_library(path)
.map(Pdfium::new)
.map_err(|e| PdfiumAutoError::Bind {
path: path.to_path_buf(),
reason: e.to_string(),
})
}
#[cfg(feature = "bundled")]
mod bundled_lib {
include!(concat!(env!("OUT_DIR"), "/bundled.rs"));
}
#[cfg(feature = "bundled")]
pub fn ensure_pdfium_bundled() -> Result<PathBuf, PdfiumAutoError> {
if let Some(path) = RESOLVED_PATH.get() {
return Ok(path.clone());
}
let info = detect_platform()?;
let cache_dir = pdfium_cache_dir();
let lib_path = cache_dir.join(info.lib_name);
if !lib_path.exists() {
std::fs::create_dir_all(&cache_dir).map_err(PdfiumAutoError::CacheDir)?;
std::fs::write(&lib_path, bundled_lib::PDFIUM_BYTES).map_err(|e| {
PdfiumAutoError::Extract(format!(
"Failed to write bundled pdfium to {}: {}",
lib_path.display(),
e
))
})?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let mut perms = std::fs::metadata(&lib_path)
.map_err(PdfiumAutoError::CacheDir)?
.permissions();
perms.set_mode(perms.mode() | 0o755);
std::fs::set_permissions(&lib_path, perms).map_err(PdfiumAutoError::CacheDir)?;
}
}
let _ = RESOLVED_PATH.set(lib_path.clone());
Ok(lib_path)
}
#[cfg(feature = "bundled")]
pub fn bind_bundled() -> Result<Pdfium, PdfiumAutoError> {
let lib_path = ensure_pdfium_bundled()?;
bind_pdfium_from_path(&lib_path)
}
fn resolve_or_download(
on_progress: Option<&dyn Fn(u64, Option<u64>)>,
) -> Result<PathBuf, PdfiumAutoError> {
if let Ok(env_path) = std::env::var("PDFIUM_LIB_PATH") {
let p = PathBuf::from(env_path);
if p.exists() {
return Ok(p);
}
eprintln!(
"pdfium-auto: PDFIUM_LIB_PATH '{}' not found; downloading …",
p.display()
);
}
let info = detect_platform()?;
let cache_dir = pdfium_cache_dir();
let lib_path = cache_dir.join(info.lib_name);
if lib_path.exists() {
return Ok(lib_path);
}
let url = format!(
"{}/chromium%2F{}/{}",
BASE_URL, PDFIUM_VERSION, info.archive_name
);
std::fs::create_dir_all(&cache_dir).map_err(PdfiumAutoError::CacheDir)?;
let archive_bytes = download_bytes(&url, on_progress)?;
extract_library(&archive_bytes, info.lib_path_in_archive, &lib_path)?;
Ok(lib_path)
}
fn download_bytes(
url: &str,
on_progress: Option<&dyn Fn(u64, Option<u64>)>,
) -> Result<Vec<u8>, PdfiumAutoError> {
let client = reqwest::blocking::Client::builder()
.user_agent(concat!("pdfium-auto/", env!("CARGO_PKG_VERSION")))
.redirect(reqwest::redirect::Policy::limited(5))
.build()
.map_err(|e| PdfiumAutoError::Download(e.to_string()))?;
let response = client
.get(url)
.send()
.map_err(|e| PdfiumAutoError::Download(format!("GET {url}: {e}")))?;
if !response.status().is_success() {
return Err(PdfiumAutoError::Download(format!(
"HTTP {} for {url}",
response.status()
)));
}
let total = response.content_length();
let capacity = total.unwrap_or(35 * 1024 * 1024) as usize;
let mut buf = Vec::with_capacity(capacity);
let mut stream = response;
let mut chunk = vec![0u8; 64 * 1024]; let mut downloaded: u64 = 0;
loop {
match stream.read(&mut chunk) {
Ok(0) => break,
Ok(n) => {
buf.extend_from_slice(&chunk[..n]);
downloaded += n as u64;
if let Some(cb) = on_progress {
cb(downloaded, total);
}
}
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => {
return Err(PdfiumAutoError::Download(format!("Read error: {e}")));
}
}
}
Ok(buf)
}
fn extract_library(
archive_bytes: &[u8],
lib_path_in_archive: &str,
dest_path: &Path,
) -> Result<(), PdfiumAutoError> {
use flate2::read::GzDecoder;
use tar::Archive;
let gz = GzDecoder::new(archive_bytes);
let mut archive = Archive::new(gz);
for entry in archive
.entries()
.map_err(|e| PdfiumAutoError::Extract(e.to_string()))?
{
let mut entry = entry.map_err(|e| PdfiumAutoError::Extract(e.to_string()))?;
let entry_path = entry
.path()
.map_err(|e| PdfiumAutoError::Extract(e.to_string()))?;
let entry_str = entry_path.to_string_lossy();
if entry_str == lib_path_in_archive {
entry
.unpack(dest_path)
.map_err(|e| PdfiumAutoError::Extract(format!("Unpack failed: {e}")))?;
return Ok(());
}
}
Err(PdfiumAutoError::Extract(format!(
"Library '{}' not found in archive",
lib_path_in_archive
)))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detect_platform_is_supported() {
detect_platform().expect("current platform should be supported");
}
#[test]
fn cache_dir_is_deterministic() {
let d1 = pdfium_cache_dir();
let d2 = pdfium_cache_dir();
assert_eq!(d1, d2);
assert!(d1.to_str().unwrap().contains("pdf2md"));
assert!(d1.to_str().unwrap().contains(PDFIUM_VERSION));
}
#[test]
fn cache_dir_override_via_env() {
std::env::set_var("PDFIUM_AUTO_CACHE_DIR", "/tmp/test_pdf2md_override");
let d = pdfium_cache_dir();
std::env::remove_var("PDFIUM_AUTO_CACHE_DIR");
assert!(d.starts_with("/tmp/test_pdf2md_override"));
assert!(d.to_str().unwrap().contains(PDFIUM_VERSION));
}
#[test]
fn platform_info_fields_nonempty() {
let info = detect_platform().unwrap();
assert!(!info.archive_name.is_empty());
assert!(!info.lib_path_in_archive.is_empty());
assert!(!info.lib_name.is_empty());
}
}