cli_pdf_to_text/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
use redirect_stderr;

use lopdf;
use pdf_extract;
use std::{
  env,
  io::{BufWriter, Cursor},
};

pub fn pdf_to_text(
  pdf_path: &str,
) -> Result<String, Box<dyn std::error::Error>> {
  #[cfg(target_os = "windows")]
  redirect_stderr::redirect_stdout()?;

  #[allow(unused_assignments)]
  let mut original_fd = -1;

  #[allow(unused_assignments)]
  let mut duplicate_fd = -1;

  #[cfg(not(target_os = "windows"))]
  {
    extern crate libc;

    use std::fs::File;
    use std::io::{self, Write};
    use std::os::fd::AsRawFd;
    use std::os::unix::io::FromRawFd;

    let stdout = io::stdout();
    original_fd = stdout.as_raw_fd();

    duplicate_fd = unsafe { libc::dup(original_fd) };

    let dev_null = File::open("/dev/null").unwrap();
    unsafe {
      libc::dup2(dev_null.as_raw_fd(), original_fd);
    }
  }

  let path = std::path::Path::new(pdf_path);

  let mut output_buf = Vec::new();
  {
    let mut output_file = BufWriter::new(Cursor::new(&mut output_buf));

    let doc = lopdf::Document::load(path)?;

    pdf_extract::print_metadata(&doc);

    let mut output = Box::new(pdf_extract::PlainTextOutput::new(
      &mut output_file as &mut dyn std::io::Write,
    ));

    pdf_extract::output_doc(&doc, output.as_mut())?;
  }

  #[cfg(target_os = "windows")]
  redirect_stderr::restore_stdout()?;

  #[cfg(not(target_os = "windows"))]
  {
    extern crate libc;

    use std::fs::File;
    use std::io::{self, Write};
    use std::os::fd::AsRawFd;
    use std::os::unix::io::FromRawFd;

    unsafe {
      libc::dup2(duplicate_fd, original_fd);
    }
  }

  // println!("{:?}", output_buf);
  // panic!();

  let res = std::str::from_utf8(&output_buf)
    .expect("Could not convert to String")
    .to_owned();

  return Ok(res);
}