1use futures::{future::BoxFuture, stream::FuturesOrdered, TryStreamExt};
2use std::process::Stdio;
3use tokio::io::AsyncWriteExt;
4use tokio::process::Command;
5
6use crate::error::{PDF2ImageError, Result};
7use crate::render_options::RenderOptions;
8
9pub struct PdfInfo {
10 page_count: u32,
12 encrypted: bool,
14}
15
16impl PdfInfo {
17 pub async fn read(data: &[u8]) -> Result<Self> {
18 let (page_count, encrypted) = extract_pdf_info(data).await?;
19
20 Ok(Self {
21 page_count,
22 encrypted,
23 })
24 }
25
26 pub fn page_count(&self) -> u32 {
28 self.page_count
29 }
30
31 pub fn is_encrypted(&self) -> bool {
33 self.encrypted
34 }
35}
36
37#[derive(Debug, Clone)]
38pub enum Pages {
40 All,
41 Range(std::ops::RangeInclusive<u32>),
42 Specific(Vec<u32>),
43}
44
45pub async fn render_pdf_single_page<'data, 'options: 'data>(
47 data: &'data [u8],
48 info: &'options PdfInfo,
49 page: u32,
50 options: &'options RenderOptions,
51) -> Result<image::DynamicImage> {
52 if info.encrypted && options.password.is_none() {
53 return Err(PDF2ImageError::NoPasswordForEncryptedPDF);
54 }
55
56 let image = render_page(data, page, options).await?;
57
58 Ok(image)
59}
60
61pub async fn render_pdf_multi_page<'data, 'options: 'data>(
63 data: &'data [u8],
64 info: &'options PdfInfo,
65 pages: Pages,
66 options: &'options RenderOptions,
67) -> Result<Vec<image::DynamicImage>> {
68 if info.encrypted && options.password.is_none() {
69 return Err(PDF2ImageError::NoPasswordForEncryptedPDF);
70 }
71
72 let valid_range = 0..=info.page_count;
73
74 let pages_range: Vec<u32> = match pages {
75 Pages::All => valid_range.collect(),
76 Pages::Range(range) => range .filter(|value| valid_range.contains(value))
78 .collect(),
79 Pages::Specific(pages) => pages .into_iter()
81 .filter(|value| valid_range.contains(value))
82 .collect(),
83 };
84
85 pages_range
86 .into_iter()
87 .map(|page| -> BoxFuture<'data, Result<image::DynamicImage>> {
88 Box::pin(render_page(data, page, options))
89 })
90 .collect::<FuturesOrdered<BoxFuture<'data, Result<image::DynamicImage>>>>()
91 .try_collect()
92 .await
93}
94
95async fn render_page<'data, 'options: 'data>(
97 data: &'data [u8],
98 page: u32,
99 options: &'options RenderOptions,
100) -> Result<image::DynamicImage> {
101 let cli_options = options.to_cli_args();
102
103 let executable = get_executable_path(if options.pdftocairo {
104 "pdftocairo"
105 } else {
106 "pdftoppm"
107 });
108
109 let poppler_args: &[&str] = if options.pdftocairo {
110 &["-", "-", "-jpeg", "-singlefile"]
111 } else {
112 &["-jpeg", "-singlefile"]
113 };
114
115 let mut child = Command::new(&executable)
116 .args(poppler_args)
118 .args([
120 "-f".to_string(),
121 format!("{page}"),
122 "-l".to_string(),
123 format!("{page}"),
124 ])
125 .args(cli_options)
127 .stdin(Stdio::piped())
129 .stdout(Stdio::piped())
130 .spawn()?;
131
132 child.stdin.as_mut().unwrap().write_all(data).await?;
134
135 let output = child.wait_with_output().await?;
136 let image = image::load_from_memory_with_format(&output.stdout, image::ImageFormat::Jpeg)?;
137
138 Ok(image)
139}
140
141pub async fn pdftext_single_page<'data, 'options: 'data>(
143 data: &'data [u8],
144 info: &'options PdfInfo,
145 page: u32,
146 options: &'options RenderOptions,
147) -> Result<String> {
148 if info.encrypted && options.password.is_none() {
149 return Err(PDF2ImageError::NoPasswordForEncryptedPDF);
150 }
151
152 let image = render_page_text(data, page, options).await?;
153
154 Ok(image)
155}
156
157pub async fn pdftext_multi_page<'data, 'options: 'data>(
162 data: &'data [u8],
163 info: &'options PdfInfo,
164 pages: Pages,
165 options: &'options RenderOptions,
166) -> Result<String> {
167 if info.encrypted && options.password.is_none() {
168 return Err(PDF2ImageError::NoPasswordForEncryptedPDF);
169 }
170
171 let valid_range = 0..=info.page_count;
172
173 let pages_range: Vec<u32> = match pages {
174 Pages::All => valid_range.collect(),
175 Pages::Range(range) => range .filter(|value| valid_range.contains(value))
177 .collect(),
178 Pages::Specific(pages) => pages .into_iter()
180 .filter(|value| valid_range.contains(value))
181 .collect(),
182 };
183
184 pages_range
185 .into_iter()
186 .map(|page| -> BoxFuture<'data, Result<String>> {
187 Box::pin(render_page_text(data, page, options))
188 })
189 .collect::<FuturesOrdered<BoxFuture<'data, Result<String>>>>()
190 .try_collect()
191 .await
192}
193
194pub async fn pdftext_all_pages<'data, 'options: 'data>(
198 data: &'data [u8],
199 info: &'options PdfInfo,
200 pages: Pages,
201 options: &'options RenderOptions,
202) -> Result<String> {
203 if info.encrypted && options.password.is_none() {
204 return Err(PDF2ImageError::NoPasswordForEncryptedPDF);
205 }
206
207 let valid_range = 0..=info.page_count;
208
209 let pages_range: Vec<u32> = match pages {
210 Pages::All => return render_all_pages_text(data, options).await,
211 Pages::Range(range) => range .filter(|value| valid_range.contains(value))
213 .collect(),
214 Pages::Specific(pages) => pages .into_iter()
216 .filter(|value| valid_range.contains(value))
217 .collect(),
218 };
219
220 pages_range
221 .into_iter()
222 .map(|page| -> BoxFuture<'data, Result<String>> {
223 Box::pin(render_page_text(data, page, options))
224 })
225 .collect::<FuturesOrdered<BoxFuture<'data, Result<String>>>>()
226 .try_collect()
227 .await
228}
229
230async fn render_page_text<'data, 'options: 'data>(
232 data: &'data [u8],
233 page: u32,
234 options: &'options RenderOptions,
235) -> Result<String> {
236 let cli_options = options.to_cli_args();
237
238 let mut child = Command::new("pdftotext")
239 .args(["-", "-"])
241 .args([
243 "-f".to_string(),
244 format!("{page}"),
245 "-l".to_string(),
246 format!("{page}"),
247 ])
248 .args(cli_options)
250 .stdin(Stdio::piped())
252 .stdout(Stdio::piped())
253 .spawn()?;
254
255 child.stdin.as_mut().unwrap().write_all(data).await?;
257
258 let output = child.wait_with_output().await?;
259 let value = String::from_utf8_lossy(&output.stdout);
260
261 Ok(value.into_owned())
262}
263async fn render_all_pages_text<'data, 'options: 'data>(
265 data: &'data [u8],
266 options: &'options RenderOptions,
267) -> Result<String> {
268 let cli_options = options.to_cli_args();
269
270 let mut child = Command::new("pdftotext")
271 .args(["-", "-"])
273 .args(cli_options)
275 .stdin(Stdio::piped())
277 .stdout(Stdio::piped())
278 .spawn()?;
279
280 child.stdin.as_mut().unwrap().write_all(data).await?;
282
283 let output = child.wait_with_output().await?;
284 let value = String::from_utf8_lossy(&output.stdout);
285
286 Ok(value.into_owned())
287}
288
289pub fn get_executable_path(command: &str) -> String {
291 if let Ok(poppler_path) = std::env::var("PDF2IMAGE_POPPLER_PATH") {
292 #[cfg(target_os = "windows")]
293 return format!("{}\\{}.exe", poppler_path, command);
294 #[cfg(not(target_os = "windows"))]
295 return format!("{}/{}", poppler_path, command);
296 }
297
298 #[cfg(target_os = "windows")]
299 return format!("{}.exe", command);
300
301 #[cfg(not(target_os = "windows"))]
302 return command.to_string();
303}
304
305pub async fn extract_pdf_info(pdf: &[u8]) -> Result<(u32, bool)> {
306 let mut child = Command::new(get_executable_path("pdfinfo"))
307 .args(["-"])
308 .stdin(Stdio::piped())
309 .stdout(Stdio::piped())
310 .spawn()?;
311
312 child.stdin.as_mut().unwrap().write_all(pdf).await?;
314 let output = child.wait_with_output().await?;
315 let mut splits = output.stdout.split(|&x| x == b'\n');
316
317 let page_count: u32 = splits
318 .clone()
319 .find(|line| line.starts_with(b"Pages:"))
320 .map(|line| {
321 let line = std::str::from_utf8(line)?;
322 let pg_str = line
323 .split_whitespace()
324 .last()
325 .ok_or(PDF2ImageError::UnableToExtractPageCount)?;
326 pg_str
327 .parse::<u32>()
328 .map_err(|_| PDF2ImageError::UnableToExtractPageCount)
329 })
330 .ok_or(PDF2ImageError::UnableToExtractPageCount)??;
331
332 let encrypted = splits
333 .find(|line| line.starts_with(b"Encrypted:"))
334 .map(|line| {
335 let line = std::str::from_utf8(line)?;
336 Ok(
337 match line
338 .split_whitespace()
339 .last()
340 .ok_or(PDF2ImageError::UnableToExtractEncryptionStatus)?
341 {
342 "yes" => true,
343 "no" => false,
344 _ => return Err(PDF2ImageError::UnableToExtractEncryptionStatus),
345 },
346 )
347 })
348 .ok_or(PDF2ImageError::UnableToExtractEncryptionStatus)??;
349
350 Ok((page_count, encrypted))
351}