1use std::{
9 path::{Path, PathBuf},
10 process::Command,
11};
12
13#[derive(Debug, thiserror::Error)]
14pub enum ToolError {
15 #[error(
16 "poppler tool '{0}' not found. Install poppler and either add it to PATH or set POPPLER_PATH to its bin directory."
17 )]
18 NotFound(&'static str),
19
20 #[error("'{tool}' failed (exit {code:?}): {stderr}")]
21 Failed {
22 tool: &'static str,
23 code: Option<i32>,
24 stderr: String,
25 },
26
27 #[error(transparent)]
28 Io(#[from] std::io::Error),
29}
30
31#[derive(Debug, Clone)]
32pub struct PopplerTools {
33 pub pdftohtml: PathBuf,
34 pub pdftotext: PathBuf,
35}
36
37impl PopplerTools {
38 pub fn discover() -> Result<Self, ToolError> {
40 Ok(Self {
41 pdftohtml: find_tool("pdftohtml")?,
42 pdftotext: find_tool("pdftotext")?,
43 })
44 }
45
46 pub fn version(&self) -> Option<String> {
48 let output = Command::new(&self.pdftohtml).arg("-v").output().ok()?;
49 let banner = String::from_utf8_lossy(&output.stderr);
50 banner.lines().next().map(|line| line.trim().to_string())
51 }
52
53 pub fn pdf_to_xml(&self, pdf: &Path) -> Result<String, ToolError> {
55 let output = Command::new(&self.pdftohtml)
56 .args(["-xml", "-i", "-stdout", "-q", "-enc", "UTF-8"])
57 .arg(pdf)
58 .output()?;
59 if !output.status.success() {
60 return Err(ToolError::Failed {
61 tool: "pdftohtml",
62 code: output.status.code(),
63 stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
64 });
65 }
66 Ok(String::from_utf8_lossy(&output.stdout).into_owned())
67 }
68
69 pub fn pdf_to_text(&self, pdf: &Path) -> Result<String, ToolError> {
73 let output = Command::new(&self.pdftotext)
74 .args(["-enc", "UTF-8", "-q"])
75 .arg(pdf)
76 .arg("-")
77 .output()?;
78 if !output.status.success() {
79 return Err(ToolError::Failed {
80 tool: "pdftotext",
81 code: output.status.code(),
82 stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
83 });
84 }
85 Ok(String::from_utf8_lossy(&output.stdout).into_owned())
86 }
87}
88
89fn find_tool(name: &'static str) -> Result<PathBuf, ToolError> {
90 let exe = if cfg!(windows) {
91 format!("{name}.exe")
92 } else {
93 name.to_string()
94 };
95
96 if let Ok(dir) = std::env::var("POPPLER_PATH") {
97 let candidate = Path::new(&dir).join(&exe);
98 if candidate.is_file() {
99 return Ok(candidate);
100 }
101 }
102
103 if let Some(paths) = std::env::var_os("PATH") {
104 for dir in std::env::split_paths(&paths) {
105 let candidate = dir.join(&exe);
106 if candidate.is_file() {
107 return Ok(candidate);
108 }
109 }
110 }
111
112 Err(ToolError::NotFound(name))
113}