oar_ocr/processors/resize/ocr.rs
1//! OCR-specific image resizing functionality.
2//!
3//! This module provides functionality for resizing images specifically for OCR processing.
4//! It includes dynamic resizing based on image ratios and static resizing to fixed dimensions.
5
6use crate::core::{
7 OCRError,
8 constants::{DEFAULT_MAX_IMG_WIDTH, DEFAULT_REC_IMAGE_SHAPE},
9};
10use crate::utils::{OCRResizePadConfig, ocr_resize_and_pad};
11use image::RgbImage;
12
13/// OCR-specific image resizer.
14///
15/// This struct handles resizing of images for OCR processing. It supports both dynamic
16/// resizing based on the image's width-to-height ratio and static resizing to fixed
17/// dimensions.
18#[derive(Debug)]
19pub struct OCRResize {
20 pub rec_image_shape: [usize; 3],
21 pub input_shape: Option<[usize; 3]>,
22 pub max_img_w: usize,
23}
24
25impl OCRResize {
26 /// Creates a new OCRResize instance with default maximum width.
27 ///
28 /// # Arguments
29 ///
30 /// * `rec_image_shape` - Optional shape for recognition images [channels, height, width].
31 /// If None, uses DEFAULT_REC_IMAGE_SHAPE.
32 /// * `input_shape` - Optional input shape [channels, height, width].
33 ///
34 /// # Returns
35 ///
36 /// A new OCRResize instance.
37 pub fn new(rec_image_shape: Option<[usize; 3]>, input_shape: Option<[usize; 3]>) -> Self {
38 Self::with_max_width(rec_image_shape, input_shape, None)
39 }
40
41 /// Creates a new OCRResize instance with custom maximum width.
42 ///
43 /// # Arguments
44 ///
45 /// * `rec_image_shape` - Optional shape for recognition images [channels, height, width].
46 /// If None, uses DEFAULT_REC_IMAGE_SHAPE.
47 /// * `input_shape` - Optional input shape [channels, height, width].
48 /// * `max_img_w` - Optional maximum image width. If None, uses DEFAULT_MAX_IMG_WIDTH.
49 ///
50 /// # Returns
51 ///
52 /// A new OCRResize instance.
53 pub fn with_max_width(
54 rec_image_shape: Option<[usize; 3]>,
55 input_shape: Option<[usize; 3]>,
56 max_img_w: Option<usize>,
57 ) -> Self {
58 let rec_image_shape = rec_image_shape.unwrap_or(DEFAULT_REC_IMAGE_SHAPE);
59 Self {
60 rec_image_shape,
61 input_shape,
62 max_img_w: max_img_w.unwrap_or(DEFAULT_MAX_IMG_WIDTH),
63 }
64 }
65
66 /// Resizes an image based on a maximum width-to-height ratio.
67 ///
68 /// This method resizes an image to fit within the specified dimensions while maintaining
69 /// the aspect ratio. If the calculated width exceeds the maximum allowed width, the image
70 /// is resized to the maximum width.
71 ///
72 /// # Arguments
73 ///
74 /// * `img` - The input RGB image to resize.
75 /// * `max_wh_ratio` - The maximum width-to-height ratio for the resized image.
76 ///
77 /// # Returns
78 ///
79 /// A resized and padded RGB image.
80 pub fn resize_img(&self, img: &RgbImage, max_wh_ratio: f32) -> RgbImage {
81 let [_img_c, img_h, _img_w] = self.rec_image_shape;
82
83 let config = OCRResizePadConfig::new(img_h as u32, self.max_img_w as u32);
84 let (padded_image, _actual_width) = ocr_resize_and_pad(img, &config, Some(max_wh_ratio));
85
86 padded_image
87 }
88
89 /// Resizes an image using the default width-to-height ratio from rec_image_shape.
90 ///
91 /// This method calculates the width-to-height ratio from the configured rec_image_shape
92 /// and uses it to resize the image via resize_img.
93 ///
94 /// # Arguments
95 ///
96 /// * `img` - The input RGB image to resize.
97 ///
98 /// # Returns
99 ///
100 /// A resized and padded RGB image.
101 pub fn resize(&self, img: &RgbImage) -> RgbImage {
102 let [_, img_h, img_w] = self.rec_image_shape;
103 let max_wh_ratio = img_w as f32 / img_h as f32;
104 self.resize_img(img, max_wh_ratio)
105 }
106
107 /// Resizes an image to a static size defined by input_shape.
108 ///
109 /// This method resizes an image to exact dimensions specified in the input_shape.
110 /// It requires input_shape to be configured, otherwise it returns a ConfigError.
111 ///
112 /// # Arguments
113 ///
114 /// * `img` - The input RGB image to resize.
115 ///
116 /// # Returns
117 ///
118 /// A resized RGB image or an OCRError if input_shape is not configured.
119 pub fn static_resize(&self, img: &RgbImage) -> Result<RgbImage, OCRError> {
120 let [_img_c, img_h, img_w] = self.input_shape.ok_or_else(|| {
121 OCRError::resize_error(
122 "Input shape not configured for static resize",
123 crate::core::errors::SimpleError::new("Missing input shape configuration"),
124 )
125 })?;
126
127 let resized_image = image::imageops::resize(
128 img,
129 img_w as u32,
130 img_h as u32,
131 image::imageops::FilterType::Triangle,
132 );
133
134 Ok(resized_image)
135 }
136
137 /// Applies resizing to a batch of images.
138 ///
139 /// This method applies either dynamic resizing (using resize) or static resizing
140 /// (using static_resize) to a batch of images, depending on whether input_shape is configured.
141 /// If input_shape is None, dynamic resizing is used; otherwise, static resizing is used.
142 ///
143 /// # Arguments
144 ///
145 /// * `imgs` - A slice of RGB images to resize.
146 ///
147 /// # Returns
148 ///
149 /// A vector of resized RGB images or an OCRError if static resizing fails.
150 pub fn apply(&self, imgs: &[RgbImage]) -> Result<Vec<RgbImage>, OCRError> {
151 if self.input_shape.is_none() {
152 Ok(imgs.iter().map(|img| self.resize(img)).collect())
153 } else {
154 imgs.iter().map(|img| self.static_resize(img)).collect()
155 }
156 }
157
158 /// Resizes an image to fit tensor shape requirements.
159 ///
160 /// This method resizes an image to fit within the dimensions specified by rec_image_shape,
161 /// while maintaining the aspect ratio. If the calculated width exceeds the maximum allowed
162 /// width, the image is resized to the maximum width. The resulting image is padded to
163 /// match the target dimensions.
164 ///
165 /// # Arguments
166 ///
167 /// * `img` - The input RGB image to resize.
168 ///
169 /// # Returns
170 ///
171 /// A resized and padded RGB image or an OCRError.
172 pub fn resize_to_tensor_shape(&self, img: &RgbImage) -> Result<RgbImage, OCRError> {
173 let [_img_c, img_h, _img_w] = self.rec_image_shape;
174
175 let config = OCRResizePadConfig::new(img_h as u32, self.max_img_w as u32);
176 let (padded_image, _actual_width) = ocr_resize_and_pad(img, &config, None);
177
178 Ok(padded_image)
179 }
180
181 /// Applies tensor shape resizing to a batch of images.
182 ///
183 /// This method applies resize_to_tensor_shape to a batch of images. It handles
184 /// empty batches by returning an empty vector.
185 ///
186 /// # Arguments
187 ///
188 /// * `imgs` - A slice of RGB images to resize.
189 ///
190 /// # Returns
191 ///
192 /// A vector of resized RGB images or an OCRError if resizing fails.
193 pub fn apply_to_images(&self, imgs: &[RgbImage]) -> Result<Vec<RgbImage>, OCRError> {
194 if imgs.is_empty() {
195 return Ok(Vec::new());
196 }
197
198 let mut resized_images = Vec::with_capacity(imgs.len());
199
200 for img in imgs {
201 let resized_img = self.resize_to_tensor_shape(img)?;
202 resized_images.push(resized_img);
203 }
204
205 Ok(resized_images)
206 }
207}