cli_pdf_to_text/
lib.rs

1use hygg_shared::normalize_file_path;
2use std::{
3  env,
4  io::{BufWriter, Cursor},
5};
6
7pub fn pdf_to_text(
8  pdf_path: &str,
9) -> Result<String, Box<dyn std::error::Error>> {
10  let canonical_path = normalize_file_path(pdf_path)?;
11  #[cfg(target_os = "windows")]
12  redirect_stderr::redirect_stdout()?;
13
14  #[allow(unused_assignments)]
15  let mut original_fd = -1;
16
17  #[allow(unused_assignments)]
18  let mut duplicate_fd = -1;
19
20  #[cfg(not(target_os = "windows"))]
21  {
22    extern crate libc;
23
24    use std::fs::File;
25    use std::io::{self, Write};
26    use std::os::fd::AsRawFd;
27    use std::os::unix::io::FromRawFd;
28
29    let stdout = io::stdout();
30    original_fd = stdout.as_raw_fd();
31
32    duplicate_fd = unsafe { libc::dup(original_fd) };
33
34    let dev_null = File::open("/dev/null")
35      .map_err(|e| format!("Failed to open /dev/null: {e}"))?;
36    unsafe {
37      libc::dup2(dev_null.as_raw_fd(), original_fd);
38    }
39  }
40
41  let mut output_buf = Vec::new();
42  {
43    let mut output_file = BufWriter::new(Cursor::new(&mut output_buf));
44
45    let doc = pdf_extract::Document::load(&canonical_path)?;
46
47    pdf_extract::print_metadata(&doc);
48
49    let mut output = Box::new(pdf_extract::PlainTextOutput::new(
50      &mut output_file as &mut dyn std::io::Write,
51    ));
52
53    pdf_extract::output_doc(&doc, output.as_mut())?;
54  }
55
56  #[cfg(target_os = "windows")]
57  redirect_stderr::restore_stdout()?;
58
59  #[cfg(not(target_os = "windows"))]
60  {
61    extern crate libc;
62
63    use std::fs::File;
64    use std::io::{self, Write};
65    use std::os::fd::AsRawFd;
66    use std::os::unix::io::FromRawFd;
67
68    unsafe {
69      libc::dup2(duplicate_fd, original_fd);
70    }
71  }
72
73  // println!("{:?}", output_buf);
74  // panic!();
75
76  let res = std::str::from_utf8(&output_buf)
77    .map_err(|e| format!("Failed to convert PDF output to UTF-8: {e}"))?
78    .to_owned();
79
80  Ok(res)
81}