processors_rs/pdf/tesseract/
command.rs

1use input::{Args, Image};
2
3use super::*;
4use std::process::{Command, Stdio};
5use std::string::ToString;
6
7use crate::pdf::tesseract::error::{TessError, TessResult};
8#[cfg(target_os = "windows")]
9use std::os::windows::process::CommandExt;
10
11#[cfg(target_os = "windows")]
12const CREATE_NO_WINDOW: u32 = 0x08000000;
13
14pub(crate) fn get_tesseract_command(path: Option<&str>) -> Command {
15    if let Some(path) = path {
16        Command::new(path)
17    } else {
18        let tesseract = if cfg!(target_os = "windows") {
19            "tesseract.exe"
20        } else {
21            "tesseract"
22        };
23
24        Command::new(tesseract)
25    }
26}
27
28pub fn get_tesseract_version() -> TessResult<String> {
29    let mut command = get_tesseract_command(None);
30    command.arg("--version");
31
32    run_tesseract_command(&mut command)
33}
34
35pub fn get_tesseract_langs() -> TessResult<Vec<String>> {
36    let mut command = get_tesseract_command(None);
37    command.arg("--list-langs");
38
39    let output = run_tesseract_command(&mut command)?;
40    let langs = output.lines().skip(1).map(|x| x.into()).collect();
41    Ok(langs)
42}
43
44pub(crate) fn run_tesseract_command(command: &mut Command) -> TessResult<String> {
45    if cfg!(debug_assertions) {
46        show_command(command);
47    }
48
49    #[cfg(target_os = "windows")]
50    command.creation_flags(CREATE_NO_WINDOW);
51
52    let child = command
53        .stdout(Stdio::piped())
54        .stderr(Stdio::piped())
55        .spawn()
56        .map_err(|_| TessError::TesseractNotFoundError)?;
57
58    let output = child
59        .wait_with_output()
60        .map_err(|_| TessError::TesseractNotFoundError)?;
61
62    let out = String::from_utf8(output.stdout).unwrap();
63    let err = String::from_utf8(output.stderr).unwrap();
64    let status = output.status;
65
66    match status.code() {
67        Some(0) => Ok(out),
68        _ => Err(TessError::CommandExitStatusError(status.to_string(), err)),
69    }
70}
71
72fn show_command(command: &Command) {
73    let params: Vec<String> = command
74        .get_args()
75        .map(|x| x.to_str().unwrap_or(""))
76        .map(|s| s.to_string())
77        .collect();
78
79    println!(
80        "Tesseract Command: {} {}",
81        command.get_program().to_str().unwrap(),
82        params.join(" ")
83    );
84}
85
86pub fn image_to_string(image: &Image, args: &Args) -> TessResult<String> {
87    let mut command = create_tesseract_command(image, args)?;
88    let output = run_tesseract_command(&mut command)?;
89
90    Ok(output)
91}
92
93pub(crate) fn create_tesseract_command(image: &Image, args: &Args) -> TessResult<Command> {
94    let path = args.path.clone();
95    let mut command = get_tesseract_command(path.as_deref());
96    command
97        .arg(image.get_image_path()?)
98        .arg("stdout")
99        .arg("-l")
100        .arg(args.lang.clone());
101
102    if let Some(dpi) = args.dpi {
103        command.arg("--dpi").arg(dpi.to_string());
104    }
105
106    if let Some(psm) = args.psm {
107        command.arg("--psm").arg(psm.to_string());
108    }
109
110    if let Some(oem) = args.oem {
111        command.arg("--oem").arg(oem.to_string());
112    }
113
114    for parameter in args.get_config_variable_args() {
115        command.arg("-c").arg(parameter);
116    }
117
118    Ok(command)
119}
120
121#[cfg(test)]
122mod tests {
123    use crate::pdf::tesseract::command::get_tesseract_langs;
124
125    #[test]
126    fn test_get_tesseract_langs() {
127        let langs = get_tesseract_langs().unwrap();
128
129        assert!(langs.contains(&"eng".into()));
130    }
131}