processors_rs/pdf/tesseract/
input.rs1use crate::pdf::tesseract::error::{TessError, TessResult};
2use image::DynamicImage;
3use std::{
4 collections::HashMap,
5 fmt::{self},
6 path::{Path, PathBuf},
7};
8
9#[derive(Clone, Debug, PartialEq)]
10pub struct Args {
11 pub lang: String,
12 pub config_variables: HashMap<String, String>,
13 pub dpi: Option<i32>,
14 pub psm: Option<i32>,
15 pub oem: Option<i32>,
16 pub path: Option<String>,
17}
18
19impl Default for Args {
20 fn default() -> Self {
21 Args {
22 lang: "eng".into(),
23 config_variables: HashMap::new(),
24 dpi: Some(150),
25 psm: Some(3),
26 oem: Some(3),
27 path: None,
28 }
29 }
30}
31
32impl Args {
33 pub fn with_path(mut self, path: Option<&str>) -> Self {
34 self.path = path.map(|p| p.to_string());
35 self
36 }
37
38 pub(crate) fn get_config_variable_args(&self) -> Vec<String> {
39 self.config_variables
40 .iter()
41 .map(|(key, value)| format!("{}={}", key, value))
42 .collect::<Vec<_>>()
43 }
44}
45
46#[derive(Debug)]
47pub struct Image {
48 data: InputData,
49}
50
51impl Image {
52 pub fn from_path<P: Into<PathBuf>>(path: P) -> TessResult<Self> {
53 let path = path.into();
54 Self::check_image_format(&path)?;
55 Ok(Self {
56 data: InputData::Path(path),
57 })
58 }
59
60 fn check_image_format(path: &Path) -> TessResult<()> {
61 let binding = path
62 .extension()
63 .ok_or(TessError::ImageFormatError)?
64 .to_str()
65 .ok_or(TessError::ImageFormatError)?
66 .to_uppercase();
67 if matches!(
68 binding.as_str(),
69 "JPEG" | "JPG" | "PNG" | "PBM" | "PGM" | "PPM" | "TIFF" | "BMP" | "GIF" | "WEBP"
70 ) {
71 Ok(())
72 } else {
73 Err(TessError::ImageFormatError)
74 }
75 }
76
77 pub fn from_dynamic_image(image: &DynamicImage) -> TessResult<Self> {
78 let tempfile = tempfile::Builder::new()
80 .prefix("rusty-tesseract")
81 .suffix(".png")
82 .tempfile()
83 .map_err(|e| TessError::TempfileError(e.to_string()))?;
84 let path = tempfile.path();
85 image
86 .save_with_format(path, image::ImageFormat::Png)
87 .map_err(|e| TessError::DynamicImageError(e.to_string()))?;
88
89 Ok(Self {
90 data: InputData::Image(tempfile),
91 })
92 }
93
94 pub fn get_image_path(&self) -> TessResult<&str> {
95 match &self.data {
96 InputData::Path(x) => x.to_str(),
97 InputData::Image(x) => x.path().to_str(),
98 }
99 .ok_or(TessError::ImageNotFoundError)
100 }
101}
102
103#[derive(Debug)]
104enum InputData {
105 Path(PathBuf),
106 Image(tempfile::NamedTempFile),
107}
108
109impl fmt::Display for Image {
110 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111 write!(f, "{}", self.get_image_path().unwrap())
112 }
113}
114
115#[cfg(test)]
116mod tests {
117 use super::Image;
118 use image::ImageReader;
119
120 #[test]
121 fn test_from_path() {
122 let input = Image::from_path("../test_files/clip/cat1.jpg").unwrap();
123
124 assert_eq!(
125 input.get_image_path().unwrap(),
126 "../test_files/clip/cat1.jpg"
127 )
128 }
129
130 #[test]
131 fn test_from_dynamic_image() {
132 let img = ImageReader::open("../test_files/clip/cat1.jpg")
133 .unwrap()
134 .decode()
135 .unwrap();
136
137 let input = Image::from_dynamic_image(&img).unwrap();
138
139 let temppath = input.get_image_path().unwrap();
140
141 let tempimg = ImageReader::open(temppath).unwrap().decode().unwrap();
142
143 assert_eq!(img, tempimg);
144 }
145}