processors_rs/pdf/tesseract/
command.rs1use input::{Args, Image};
2
3use super::*;
4use std::process::{Command, Stdio};
5use std::string::ToString;
6
7use crate::pdf::tesseract::error::{TessError, TessResult};
8#[cfg(target_os = "windows")]
9use std::os::windows::process::CommandExt;
10
11#[cfg(target_os = "windows")]
12const CREATE_NO_WINDOW: u32 = 0x08000000;
13
14pub(crate) fn get_tesseract_command(path: Option<&str>) -> Command {
15 if let Some(path) = path {
16 Command::new(path)
17 } else {
18 let tesseract = if cfg!(target_os = "windows") {
19 "tesseract.exe"
20 } else {
21 "tesseract"
22 };
23
24 Command::new(tesseract)
25 }
26}
27
28pub fn get_tesseract_version() -> TessResult<String> {
29 let mut command = get_tesseract_command(None);
30 command.arg("--version");
31
32 run_tesseract_command(&mut command)
33}
34
35pub fn get_tesseract_langs() -> TessResult<Vec<String>> {
36 let mut command = get_tesseract_command(None);
37 command.arg("--list-langs");
38
39 let output = run_tesseract_command(&mut command)?;
40 let langs = output.lines().skip(1).map(|x| x.into()).collect();
41 Ok(langs)
42}
43
44pub(crate) fn run_tesseract_command(command: &mut Command) -> TessResult<String> {
45 if cfg!(debug_assertions) {
46 show_command(command);
47 }
48
49 #[cfg(target_os = "windows")]
50 command.creation_flags(CREATE_NO_WINDOW);
51
52 let child = command
53 .stdout(Stdio::piped())
54 .stderr(Stdio::piped())
55 .spawn()
56 .map_err(|_| TessError::TesseractNotFoundError)?;
57
58 let output = child
59 .wait_with_output()
60 .map_err(|_| TessError::TesseractNotFoundError)?;
61
62 let out = String::from_utf8(output.stdout).unwrap();
63 let err = String::from_utf8(output.stderr).unwrap();
64 let status = output.status;
65
66 match status.code() {
67 Some(0) => Ok(out),
68 _ => Err(TessError::CommandExitStatusError(status.to_string(), err)),
69 }
70}
71
72fn show_command(command: &Command) {
73 let params: Vec<String> = command
74 .get_args()
75 .map(|x| x.to_str().unwrap_or(""))
76 .map(|s| s.to_string())
77 .collect();
78
79 println!(
80 "Tesseract Command: {} {}",
81 command.get_program().to_str().unwrap(),
82 params.join(" ")
83 );
84}
85
86pub fn image_to_string(image: &Image, args: &Args) -> TessResult<String> {
87 let mut command = create_tesseract_command(image, args)?;
88 let output = run_tesseract_command(&mut command)?;
89
90 Ok(output)
91}
92
93pub(crate) fn create_tesseract_command(image: &Image, args: &Args) -> TessResult<Command> {
94 let path = args.path.clone();
95 let mut command = get_tesseract_command(path.as_deref());
96 command
97 .arg(image.get_image_path()?)
98 .arg("stdout")
99 .arg("-l")
100 .arg(args.lang.clone());
101
102 if let Some(dpi) = args.dpi {
103 command.arg("--dpi").arg(dpi.to_string());
104 }
105
106 if let Some(psm) = args.psm {
107 command.arg("--psm").arg(psm.to_string());
108 }
109
110 if let Some(oem) = args.oem {
111 command.arg("--oem").arg(oem.to_string());
112 }
113
114 for parameter in args.get_config_variable_args() {
115 command.arg("-c").arg(parameter);
116 }
117
118 Ok(command)
119}
120
121#[cfg(test)]
122mod tests {
123 use crate::pdf::tesseract::command::get_tesseract_langs;
124
125 #[test]
126 fn test_get_tesseract_langs() {
127 let langs = get_tesseract_langs().unwrap();
128
129 assert!(langs.contains(&"eng".into()));
130 }
131}