use std::{
path::{Path, PathBuf},
process::Command,
};
#[derive(Debug, thiserror::Error)]
pub enum ToolError {
#[error(
"poppler tool '{0}' not found. Install poppler and either add it to PATH or set POPPLER_PATH to its bin directory."
)]
NotFound(&'static str),
#[error("'{tool}' failed (exit {code:?}): {stderr}")]
Failed {
tool: &'static str,
code: Option<i32>,
stderr: String,
},
#[error(transparent)]
Io(#[from] std::io::Error),
}
#[derive(Debug, Clone)]
pub struct PopplerTools {
pub pdftohtml: PathBuf,
pub pdftotext: PathBuf,
}
impl PopplerTools {
pub fn discover() -> Result<Self, ToolError> {
Ok(Self {
pdftohtml: find_tool("pdftohtml")?,
pdftotext: find_tool("pdftotext")?,
})
}
pub fn version(&self) -> Option<String> {
let output = Command::new(&self.pdftohtml).arg("-v").output().ok()?;
let banner = String::from_utf8_lossy(&output.stderr);
banner.lines().next().map(|line| line.trim().to_string())
}
pub fn pdf_to_xml(&self, pdf: &Path) -> Result<String, ToolError> {
let output = Command::new(&self.pdftohtml)
.args(["-xml", "-i", "-stdout", "-q", "-enc", "UTF-8"])
.arg(pdf)
.output()?;
if !output.status.success() {
return Err(ToolError::Failed {
tool: "pdftohtml",
code: output.status.code(),
stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
});
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
}
pub fn pdf_to_text(&self, pdf: &Path) -> Result<String, ToolError> {
let output = Command::new(&self.pdftotext)
.args(["-enc", "UTF-8", "-q"])
.arg(pdf)
.arg("-")
.output()?;
if !output.status.success() {
return Err(ToolError::Failed {
tool: "pdftotext",
code: output.status.code(),
stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
});
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
}
}
fn find_tool(name: &'static str) -> Result<PathBuf, ToolError> {
let exe = if cfg!(windows) {
format!("{name}.exe")
} else {
name.to_string()
};
if let Ok(dir) = std::env::var("POPPLER_PATH") {
let candidate = Path::new(&dir).join(&exe);
if candidate.is_file() {
return Ok(candidate);
}
}
if let Some(paths) = std::env::var_os("PATH") {
for dir in std::env::split_paths(&paths) {
let candidate = dir.join(&exe);
if candidate.is_file() {
return Ok(candidate);
}
}
}
Err(ToolError::NotFound(name))
}