processors_rs/pdf/tesseract/
input.rs

1use crate::pdf::tesseract::error::{TessError, TessResult};
2use image::DynamicImage;
3use std::{
4    collections::HashMap,
5    fmt::{self},
6    path::{Path, PathBuf},
7};
8
9#[derive(Clone, Debug, PartialEq)]
10pub struct Args {
11    pub lang: String,
12    pub config_variables: HashMap<String, String>,
13    pub dpi: Option<i32>,
14    pub psm: Option<i32>,
15    pub oem: Option<i32>,
16    pub path: Option<String>,
17}
18
19impl Default for Args {
20    fn default() -> Self {
21        Args {
22            lang: "eng".into(),
23            config_variables: HashMap::new(),
24            dpi: Some(150),
25            psm: Some(3),
26            oem: Some(3),
27            path: None,
28        }
29    }
30}
31
32impl Args {
33    pub fn with_path(mut self, path: Option<&str>) -> Self {
34        self.path = path.map(|p| p.to_string());
35        self
36    }
37
38    pub(crate) fn get_config_variable_args(&self) -> Vec<String> {
39        self.config_variables
40            .iter()
41            .map(|(key, value)| format!("{}={}", key, value))
42            .collect::<Vec<_>>()
43    }
44}
45
46#[derive(Debug)]
47pub struct Image {
48    data: InputData,
49}
50
51impl Image {
52    pub fn from_path<P: Into<PathBuf>>(path: P) -> TessResult<Self> {
53        let path = path.into();
54        Self::check_image_format(&path)?;
55        Ok(Self {
56            data: InputData::Path(path),
57        })
58    }
59
60    fn check_image_format(path: &Path) -> TessResult<()> {
61        let binding = path
62            .extension()
63            .ok_or(TessError::ImageFormatError)?
64            .to_str()
65            .ok_or(TessError::ImageFormatError)?
66            .to_uppercase();
67        if matches!(
68            binding.as_str(),
69            "JPEG" | "JPG" | "PNG" | "PBM" | "PGM" | "PPM" | "TIFF" | "BMP" | "GIF" | "WEBP"
70        ) {
71            Ok(())
72        } else {
73            Err(TessError::ImageFormatError)
74        }
75    }
76
77    pub fn from_dynamic_image(image: &DynamicImage) -> TessResult<Self> {
78        //Store Image as Tempfile
79        let tempfile = tempfile::Builder::new()
80            .prefix("rusty-tesseract")
81            .suffix(".png")
82            .tempfile()
83            .map_err(|e| TessError::TempfileError(e.to_string()))?;
84        let path = tempfile.path();
85        image
86            .save_with_format(path, image::ImageFormat::Png)
87            .map_err(|e| TessError::DynamicImageError(e.to_string()))?;
88
89        Ok(Self {
90            data: InputData::Image(tempfile),
91        })
92    }
93
94    pub fn get_image_path(&self) -> TessResult<&str> {
95        match &self.data {
96            InputData::Path(x) => x.to_str(),
97            InputData::Image(x) => x.path().to_str(),
98        }
99        .ok_or(TessError::ImageNotFoundError)
100    }
101}
102
103#[derive(Debug)]
104enum InputData {
105    Path(PathBuf),
106    Image(tempfile::NamedTempFile),
107}
108
109impl fmt::Display for Image {
110    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111        write!(f, "{}", self.get_image_path().unwrap())
112    }
113}
114
115#[cfg(test)]
116mod tests {
117    use super::Image;
118    use image::ImageReader;
119
120    #[test]
121    fn test_from_path() {
122        let input = Image::from_path("../test_files/clip/cat1.jpg").unwrap();
123
124        assert_eq!(
125            input.get_image_path().unwrap(),
126            "../test_files/clip/cat1.jpg"
127        )
128    }
129
130    #[test]
131    fn test_from_dynamic_image() {
132        let img = ImageReader::open("../test_files/clip/cat1.jpg")
133            .unwrap()
134            .decode()
135            .unwrap();
136
137        let input = Image::from_dynamic_image(&img).unwrap();
138
139        let temppath = input.get_image_path().unwrap();
140
141        let tempimg = ImageReader::open(temppath).unwrap().decode().unwrap();
142
143        assert_eq!(img, tempimg);
144    }
145}