oar_ocr/processors/resize/
ocr.rs

1//! OCR-specific image resizing functionality.
2//!
3//! This module provides functionality for resizing images specifically for OCR processing.
4//! It includes dynamic resizing based on image ratios and static resizing to fixed dimensions.
5
6use crate::core::{
7    OCRError,
8    constants::{DEFAULT_MAX_IMG_WIDTH, DEFAULT_REC_IMAGE_SHAPE},
9};
10use crate::utils::{OCRResizePadConfig, ocr_resize_and_pad};
11use image::RgbImage;
12
13/// OCR-specific image resizer.
14///
15/// This struct handles resizing of images for OCR processing. It supports both dynamic
16/// resizing based on the image's width-to-height ratio and static resizing to fixed
17/// dimensions.
18#[derive(Debug)]
19pub struct OCRResize {
20    pub rec_image_shape: [usize; 3],
21    pub input_shape: Option<[usize; 3]>,
22    pub max_img_w: usize,
23}
24
25impl OCRResize {
26    /// Creates a new OCRResize instance with default maximum width.
27    ///
28    /// # Arguments
29    ///
30    /// * `rec_image_shape` - Optional shape for recognition images [channels, height, width].
31    ///   If None, uses DEFAULT_REC_IMAGE_SHAPE.
32    /// * `input_shape` - Optional input shape [channels, height, width].
33    ///
34    /// # Returns
35    ///
36    /// A new OCRResize instance.
37    pub fn new(rec_image_shape: Option<[usize; 3]>, input_shape: Option<[usize; 3]>) -> Self {
38        Self::with_max_width(rec_image_shape, input_shape, None)
39    }
40
41    /// Creates a new OCRResize instance with custom maximum width.
42    ///
43    /// # Arguments
44    ///
45    /// * `rec_image_shape` - Optional shape for recognition images [channels, height, width].
46    ///   If None, uses DEFAULT_REC_IMAGE_SHAPE.
47    /// * `input_shape` - Optional input shape [channels, height, width].
48    /// * `max_img_w` - Optional maximum image width. If None, uses DEFAULT_MAX_IMG_WIDTH.
49    ///
50    /// # Returns
51    ///
52    /// A new OCRResize instance.
53    pub fn with_max_width(
54        rec_image_shape: Option<[usize; 3]>,
55        input_shape: Option<[usize; 3]>,
56        max_img_w: Option<usize>,
57    ) -> Self {
58        let rec_image_shape = rec_image_shape.unwrap_or(DEFAULT_REC_IMAGE_SHAPE);
59        Self {
60            rec_image_shape,
61            input_shape,
62            max_img_w: max_img_w.unwrap_or(DEFAULT_MAX_IMG_WIDTH),
63        }
64    }
65
66    /// Resizes an image based on a maximum width-to-height ratio.
67    ///
68    /// This method resizes an image to fit within the specified dimensions while maintaining
69    /// the aspect ratio. If the calculated width exceeds the maximum allowed width, the image
70    /// is resized to the maximum width.
71    ///
72    /// # Arguments
73    ///
74    /// * `img` - The input RGB image to resize.
75    /// * `max_wh_ratio` - The maximum width-to-height ratio for the resized image.
76    ///
77    /// # Returns
78    ///
79    /// A resized and padded RGB image.
80    pub fn resize_img(&self, img: &RgbImage, max_wh_ratio: f32) -> RgbImage {
81        let [_img_c, img_h, _img_w] = self.rec_image_shape;
82
83        let config = OCRResizePadConfig::new(img_h as u32, self.max_img_w as u32);
84        let (padded_image, _actual_width) = ocr_resize_and_pad(img, &config, Some(max_wh_ratio));
85
86        padded_image
87    }
88
89    /// Resizes an image using the default width-to-height ratio from rec_image_shape.
90    ///
91    /// This method calculates the width-to-height ratio from the configured rec_image_shape
92    /// and uses it to resize the image via resize_img.
93    ///
94    /// # Arguments
95    ///
96    /// * `img` - The input RGB image to resize.
97    ///
98    /// # Returns
99    ///
100    /// A resized and padded RGB image.
101    pub fn resize(&self, img: &RgbImage) -> RgbImage {
102        let [_, img_h, img_w] = self.rec_image_shape;
103        let max_wh_ratio = img_w as f32 / img_h as f32;
104        self.resize_img(img, max_wh_ratio)
105    }
106
107    /// Resizes an image to a static size defined by input_shape.
108    ///
109    /// This method resizes an image to exact dimensions specified in the input_shape.
110    /// It requires input_shape to be configured, otherwise it returns a ConfigError.
111    ///
112    /// # Arguments
113    ///
114    /// * `img` - The input RGB image to resize.
115    ///
116    /// # Returns
117    ///
118    /// A resized RGB image or an OCRError if input_shape is not configured.
119    pub fn static_resize(&self, img: &RgbImage) -> Result<RgbImage, OCRError> {
120        let [_img_c, img_h, img_w] = self.input_shape.ok_or_else(|| {
121            OCRError::resize_error(
122                "Input shape not configured for static resize",
123                crate::core::errors::SimpleError::new("Missing input shape configuration"),
124            )
125        })?;
126
127        let resized_image = image::imageops::resize(
128            img,
129            img_w as u32,
130            img_h as u32,
131            image::imageops::FilterType::Triangle,
132        );
133
134        Ok(resized_image)
135    }
136
137    /// Applies resizing to a batch of images.
138    ///
139    /// This method applies either dynamic resizing (using resize) or static resizing
140    /// (using static_resize) to a batch of images, depending on whether input_shape is configured.
141    /// If input_shape is None, dynamic resizing is used; otherwise, static resizing is used.
142    ///
143    /// # Arguments
144    ///
145    /// * `imgs` - A slice of RGB images to resize.
146    ///
147    /// # Returns
148    ///
149    /// A vector of resized RGB images or an OCRError if static resizing fails.
150    pub fn apply(&self, imgs: &[RgbImage]) -> Result<Vec<RgbImage>, OCRError> {
151        if self.input_shape.is_none() {
152            Ok(imgs.iter().map(|img| self.resize(img)).collect())
153        } else {
154            imgs.iter().map(|img| self.static_resize(img)).collect()
155        }
156    }
157
158    /// Resizes an image to fit tensor shape requirements.
159    ///
160    /// This method resizes an image to fit within the dimensions specified by rec_image_shape,
161    /// while maintaining the aspect ratio. If the calculated width exceeds the maximum allowed
162    /// width, the image is resized to the maximum width. The resulting image is padded to
163    /// match the target dimensions.
164    ///
165    /// # Arguments
166    ///
167    /// * `img` - The input RGB image to resize.
168    ///
169    /// # Returns
170    ///
171    /// A resized and padded RGB image or an OCRError.
172    pub fn resize_to_tensor_shape(&self, img: &RgbImage) -> Result<RgbImage, OCRError> {
173        let [_img_c, img_h, _img_w] = self.rec_image_shape;
174
175        let config = OCRResizePadConfig::new(img_h as u32, self.max_img_w as u32);
176        let (padded_image, _actual_width) = ocr_resize_and_pad(img, &config, None);
177
178        Ok(padded_image)
179    }
180
181    /// Applies tensor shape resizing to a batch of images.
182    ///
183    /// This method applies resize_to_tensor_shape to a batch of images. It handles
184    /// empty batches by returning an empty vector.
185    ///
186    /// # Arguments
187    ///
188    /// * `imgs` - A slice of RGB images to resize.
189    ///
190    /// # Returns
191    ///
192    /// A vector of resized RGB images or an OCRError if resizing fails.
193    pub fn apply_to_images(&self, imgs: &[RgbImage]) -> Result<Vec<RgbImage>, OCRError> {
194        if imgs.is_empty() {
195            return Ok(Vec::new());
196        }
197
198        let mut resized_images = Vec::with_capacity(imgs.len());
199
200        for img in imgs {
201            let resized_img = self.resize_to_tensor_shape(img)?;
202            resized_images.push(resized_img);
203        }
204
205        Ok(resized_images)
206    }
207}