1use hygg_shared::normalize_file_path;
2use std::{
3 env,
4 io::{BufWriter, Cursor},
5};
6
7pub fn pdf_to_text(
8 pdf_path: &str,
9) -> Result<String, Box<dyn std::error::Error>> {
10 let canonical_path = normalize_file_path(pdf_path)?;
11 #[cfg(target_os = "windows")]
12 redirect_stderr::redirect_stdout()?;
13
14 #[allow(unused_assignments)]
15 let mut original_fd = -1;
16
17 #[allow(unused_assignments)]
18 let mut duplicate_fd = -1;
19
20 #[cfg(not(target_os = "windows"))]
21 {
22 extern crate libc;
23
24 use std::fs::File;
25 use std::io::{self, Write};
26 use std::os::fd::AsRawFd;
27 use std::os::unix::io::FromRawFd;
28
29 let stdout = io::stdout();
30 original_fd = stdout.as_raw_fd();
31
32 duplicate_fd = unsafe { libc::dup(original_fd) };
33
34 let dev_null = File::open("/dev/null")
35 .map_err(|e| format!("Failed to open /dev/null: {e}"))?;
36 unsafe {
37 libc::dup2(dev_null.as_raw_fd(), original_fd);
38 }
39 }
40
41 let mut output_buf = Vec::new();
42 {
43 let mut output_file = BufWriter::new(Cursor::new(&mut output_buf));
44
45 let doc = pdf_extract::Document::load(&canonical_path)?;
46
47 pdf_extract::print_metadata(&doc);
48
49 let mut output = Box::new(pdf_extract::PlainTextOutput::new(
50 &mut output_file as &mut dyn std::io::Write,
51 ));
52
53 pdf_extract::output_doc(&doc, output.as_mut())?;
54 }
55
56 #[cfg(target_os = "windows")]
57 redirect_stderr::restore_stdout()?;
58
59 #[cfg(not(target_os = "windows"))]
60 {
61 extern crate libc;
62
63 use std::fs::File;
64 use std::io::{self, Write};
65 use std::os::fd::AsRawFd;
66 use std::os::unix::io::FromRawFd;
67
68 unsafe {
69 libc::dup2(duplicate_fd, original_fd);
70 }
71 }
72
73 let res = std::str::from_utf8(&output_buf)
77 .map_err(|e| format!("Failed to convert PDF output to UTF-8: {e}"))?
78 .to_owned();
79
80 Ok(res)
81}