oar_ocr/processors/resize/detection.rs
1//! Image resizing utilities for OCR preprocessing
2//!
3//! This module provides functionality to resize images for OCR processing,
4//! supporting different resizing strategies based on the requirements of
5//! various OCR models. The main struct `DetResizeForTest` handles different
6//! types of resizing operations.
7//!
8//! # Resize Types
9//! - Type0: Resize based on limit side length with different behaviors depending on the limit type
10//! - Type1: Resize to specific dimensions with optional aspect ratio preservation
11//! - Type2: Resize long side to specific length
12//! - Type3: Resize to specific input shape
13//!
14//! # Limit Types
15//! - Max: Resize if the longest side exceeds the limit
16//! - Min: Resize if the shortest side is below the limit
17//! - ResizeLong: Resize the long side to match the limit
18
19use crate::core::constants::{DEFAULT_LIMIT_SIDE_LEN, DEFAULT_MAX_SIDE_LIMIT};
20use crate::processors::types::{LimitType, ResizeType};
21use image::{DynamicImage, GenericImageView};
22use tracing::{error, warn};
23
24/// A struct for resizing images for OCR testing
25///
26/// This struct encapsulates different resizing strategies for preparing
27/// images for OCR processing. It supports multiple resize types based
28/// on the input parameters.
29#[derive(Debug)]
30pub struct DetResizeForTest {
31 /// The type of resizing to perform
32 pub resize_type: ResizeType,
33 /// The length to limit the side of the image to (optional)
34 pub limit_side_len: Option<u32>,
35 /// The type of limit to apply (min, max, or resize long) (optional)
36 pub limit_type: Option<LimitType>,
37 /// The maximum allowed side length
38 pub max_side_limit: u32,
39}
40
41impl DetResizeForTest {
42 /// Creates a new `DetResizeForTest` instance
43 ///
44 /// This constructor determines the resize type based on the provided parameters.
45 /// The resize type is determined in the following order:
46 /// 1. If `input_shape` is provided, uses Type3
47 /// 2. If `image_shape` is provided, uses Type1
48 /// 3. If `resize_long` is provided, uses Type2
49 /// 4. Otherwise, uses Type0 (default)
50 ///
51 /// # Parameters
52 /// * `input_shape` - Optional input shape (channels, height, width)
53 /// * `image_shape` - Optional target image shape (height, width)
54 /// * `keep_ratio` - Whether to maintain aspect ratio when resizing (used with image_shape)
55 /// * `limit_side_len` - Optional limit for side length
56 /// * `limit_type` - Optional limit type (min, max, or resize long)
57 /// * `resize_long` - Optional length to resize the long side to
58 /// * `max_side_limit` - Optional maximum side length limit
59 ///
60 /// # Returns
61 /// A new `DetResizeForTest` instance with the determined resize type and parameters
62 pub fn new(
63 input_shape: Option<(u32, u32, u32)>,
64 image_shape: Option<(u32, u32)>,
65 keep_ratio: Option<bool>,
66 limit_side_len: Option<u32>,
67 limit_type: Option<LimitType>,
68 resize_long: Option<u32>,
69 max_side_limit: Option<u32>,
70 ) -> Self {
71 // Determine resize type based on provided parameters
72 // Priority order: Type3 -> Type1 -> Type2 -> Type0 (default)
73 let resize_type = if let Some(shape) = input_shape {
74 // Type3: Resize to specific input shape (channels, height, width)
75 ResizeType::Type3 { input_shape: shape }
76 } else if let Some(shape) = image_shape {
77 // Type1: Resize to specific dimensions with optional aspect ratio preservation
78 ResizeType::Type1 {
79 image_shape: shape,
80 keep_ratio: keep_ratio.unwrap_or(false),
81 }
82 } else if let Some(long) = resize_long {
83 // Type2: Resize long side to specific length
84 ResizeType::Type2 { resize_long: long }
85 } else {
86 // Type0: Resize based on limit side length (default)
87 ResizeType::Type0
88 };
89
90 Self {
91 resize_type,
92 limit_side_len: limit_side_len.or(Some(DEFAULT_LIMIT_SIDE_LEN)),
93 limit_type: limit_type.or(Some(LimitType::Min)),
94 max_side_limit: max_side_limit.unwrap_or(DEFAULT_MAX_SIDE_LIMIT),
95 }
96 }
97
98 /// Applies resizing to a batch of images
99 ///
100 /// This method processes a vector of images, applying the configured
101 /// resize operation to each one.
102 ///
103 /// # Parameters
104 /// * `imgs` - Vector of images to resize
105 /// * `limit_side_len` - Optional override for limit side length
106 /// * `limit_type` - Optional override for limit type
107 /// * `max_side_limit` - Optional override for maximum side limit
108 ///
109 /// # Returns
110 /// A tuple containing:
111 /// 1. Vector of resized images
112 /// 2. Vector of original image shapes and resize ratios [height, width, ratio_h, ratio_w]
113 pub fn apply(
114 &self,
115 imgs: Vec<DynamicImage>,
116 limit_side_len: Option<u32>,
117 limit_type: Option<LimitType>,
118 max_side_limit: Option<u32>,
119 ) -> (Vec<DynamicImage>, Vec<[f32; 4]>) {
120 let mut resize_imgs = Vec::new();
121 let mut img_shapes = Vec::new();
122
123 // Process each image in the batch
124 for img in imgs {
125 let (resized_img, shape) =
126 self.resize(img, limit_side_len, limit_type.as_ref(), max_side_limit);
127 resize_imgs.push(resized_img);
128 img_shapes.push(shape);
129 }
130
131 (resize_imgs, img_shapes)
132 }
133
134 /// Resizes a single image based on the configured resize type
135 ///
136 /// This method applies the appropriate resize operation based on the
137 /// `resize_type` field. It also handles small images by padding them
138 /// if their dimensions are less than 64 pixels in total.
139 ///
140 /// # Parameters
141 /// * `img` - The image to resize
142 /// * `limit_side_len` - Optional override for limit side length
143 /// * `limit_type` - Optional override for limit type
144 /// * `max_side_limit` - Optional override for maximum side limit
145 ///
146 /// # Returns
147 /// A tuple containing:
148 /// 1. The resized image
149 /// 2. Array with original dimensions and resize ratios [height, width, ratio_h, ratio_w]
150 fn resize(
151 &self,
152 mut img: DynamicImage,
153 limit_side_len: Option<u32>,
154 limit_type: Option<&LimitType>,
155 max_side_limit: Option<u32>,
156 ) -> (DynamicImage, [f32; 4]) {
157 let (src_w, src_h) = img.dimensions();
158
159 // Pad small images to avoid issues with OCR processing
160 // Images with total dimensions less than 64 pixels can cause problems in OCR models
161 if (src_h + src_w) < 64 {
162 img = self.image_padding(img);
163 }
164
165 let (resized_img, ratios) = match &self.resize_type {
166 ResizeType::Type0 => {
167 self.resize_image_type0(img, limit_side_len, limit_type, max_side_limit)
168 }
169 ResizeType::Type1 {
170 image_shape,
171 keep_ratio,
172 } => self.resize_image_type1(img, *image_shape, *keep_ratio),
173 ResizeType::Type2 { resize_long } => self.resize_image_type2(img, *resize_long),
174 ResizeType::Type3 { input_shape } => self.resize_image_type3(img, *input_shape),
175 };
176
177 let shape = [src_h as f32, src_w as f32, ratios[0], ratios[1]];
178 (resized_img, shape)
179 }
180
181 /// Pads small images to a minimum size
182 ///
183 /// Ensures that images have a minimum dimension of 32x32 pixels
184 /// by padding them with black pixels if needed.
185 ///
186 /// # Parameters
187 /// * `img` - The image to pad
188 ///
189 /// # Returns
190 /// The padded image (or original if no padding was needed)
191 fn image_padding(&self, img: DynamicImage) -> DynamicImage {
192 let (w, h) = img.dimensions();
193 // Ensure minimum dimension of 32 pixels for both width and height
194 let new_w = w.max(32);
195 let new_h = h.max(32);
196
197 // If image is already at least 32x32, return it unchanged
198 if new_w == w && new_h == h {
199 return img;
200 }
201
202 // Create a new image with the padded dimensions
203 let mut padded = DynamicImage::new_rgb8(new_w, new_h);
204 // Overlay the original image onto the padded image at position (0,0)
205 image::imageops::overlay(&mut padded, &img, 0, 0);
206 padded
207 }
208
209 /// Resize type 0: Resize based on limit side length
210 ///
211 /// This method resizes the image based on a limit for the side length,
212 /// with different behaviors depending on the limit type:
213 /// - Max: Resize if the longest side exceeds the limit
214 /// - Min: Resize if the shortest side is below the limit
215 /// - ResizeLong: Resize the long side to match the limit
216 ///
217 /// The resized dimensions are also adjusted to be multiples of 32,
218 /// and constrained by the maximum side limit.
219 ///
220 /// # Parameters
221 /// * `img` - The image to resize
222 /// * `limit_side_len` - Optional override for limit side length
223 /// * `limit_type` - Optional override for limit type
224 /// * `max_side_limit` - Optional override for maximum side limit
225 ///
226 /// # Returns
227 /// A tuple containing:
228 /// 1. The resized image
229 /// 2. Array with resize ratios [ratio_h, ratio_w]
230 fn resize_image_type0(
231 &self,
232 img: DynamicImage,
233 limit_side_len: Option<u32>,
234 limit_type: Option<&LimitType>,
235 max_side_limit: Option<u32>,
236 ) -> (DynamicImage, [f32; 2]) {
237 let (w, h) = img.dimensions();
238 let limit_side_len = limit_side_len
239 .or(self.limit_side_len)
240 .unwrap_or(DEFAULT_LIMIT_SIDE_LEN);
241 let limit_type = limit_type
242 .or(self.limit_type.as_ref())
243 .unwrap_or(&LimitType::Min);
244 let max_side_limit = max_side_limit.unwrap_or(self.max_side_limit);
245
246 // Calculate resize ratio based on limit type
247 let ratio = match limit_type {
248 LimitType::Max => {
249 // Resize if the longest side exceeds the limit
250 if h.max(w) > limit_side_len {
251 limit_side_len as f32 / h.max(w) as f32
252 } else {
253 1.0
254 }
255 }
256 LimitType::Min => {
257 // Resize if the shortest side is below the limit
258 if h.min(w) < limit_side_len {
259 limit_side_len as f32 / h.min(w) as f32
260 } else {
261 1.0
262 }
263 }
264 LimitType::ResizeLong => {
265 // Resize the long side to match the limit
266 limit_side_len as f32 / h.max(w) as f32
267 }
268 };
269
270 let mut resize_h = (h as f32 * ratio) as u32;
271 let mut resize_w = (w as f32 * ratio) as u32;
272
273 // Apply maximum side limit if exceeded
274 if resize_h.max(resize_w) > max_side_limit {
275 warn!(
276 "Resized image size ({}x{}) exceeds max_side_limit of {}. Resizing to fit within limit.",
277 resize_h, resize_w, max_side_limit
278 );
279 // Calculate ratio to scale down to fit within max_side_limit
280 let limit_ratio = max_side_limit as f32 / resize_h.max(resize_w) as f32;
281 resize_h = (resize_h as f32 * limit_ratio) as u32;
282 resize_w = (resize_w as f32 * limit_ratio) as u32;
283 }
284
285 // Ensure dimensions are multiples of 32 and at least 32 pixels
286 // Adding 16 before division ensures proper rounding to nearest multiple of 32
287 resize_h = ((resize_h + 16) / 32 * 32).max(32);
288 resize_w = ((resize_w + 16) / 32 * 32).max(32);
289
290 // Return original if no resize is needed
291 if resize_h == h && resize_w == w {
292 return (img, [1.0, 1.0]);
293 }
294
295 // Handle invalid resize dimensions
296 if resize_w == 0 || resize_h == 0 {
297 error!("Invalid resize dimensions: {}x{}", resize_w, resize_h);
298 return (img, [1.0, 1.0]);
299 }
300
301 let resized_img =
302 img.resize_exact(resize_w, resize_h, image::imageops::FilterType::Lanczos3);
303 let ratio_h = resize_h as f32 / h as f32;
304 let ratio_w = resize_w as f32 / w as f32;
305
306 (resized_img, [ratio_h, ratio_w])
307 }
308
309 /// Resize type 1: Resize to specific dimensions
310 ///
311 /// This method resizes the image to specific dimensions, with an option
312 /// to maintain the aspect ratio. When keeping the ratio, the width is
313 /// adjusted to maintain the aspect ratio and then rounded up to the
314 /// nearest multiple of 32.
315 ///
316 /// # Parameters
317 /// * `img` - The image to resize
318 /// * `image_shape` - Target dimensions (height, width)
319 /// * `keep_ratio` - Whether to maintain aspect ratio
320 ///
321 /// # Returns
322 /// A tuple containing:
323 /// 1. The resized image
324 /// 2. Array with resize ratios [ratio_h, ratio_w]
325 fn resize_image_type1(
326 &self,
327 img: DynamicImage,
328 image_shape: (u32, u32),
329 keep_ratio: bool,
330 ) -> (DynamicImage, [f32; 2]) {
331 let (ori_w, ori_h) = img.dimensions();
332 let (resize_h, mut resize_w) = image_shape;
333
334 // Adjust width to maintain aspect ratio if requested
335 if keep_ratio {
336 // Calculate new width based on aspect ratio: new_width = original_width * (target_height / original_height)
337 resize_w = (ori_w * resize_h) / ori_h;
338 // Round up to nearest multiple of 32 to ensure proper alignment for OCR models
339 let n = resize_w.div_ceil(32);
340 resize_w = n * 32;
341 }
342
343 // Return original if no resize is needed
344 if resize_h == ori_h && resize_w == ori_w {
345 return (img, [1.0, 1.0]);
346 }
347
348 let ratio_h = resize_h as f32 / ori_h as f32;
349 let ratio_w = resize_w as f32 / ori_w as f32;
350 let resized_img =
351 img.resize_exact(resize_w, resize_h, image::imageops::FilterType::Lanczos3);
352
353 (resized_img, [ratio_h, ratio_w])
354 }
355
356 /// Resize type 2: Resize long side to specific length
357 ///
358 /// This method resizes the image so that its longest side matches
359 /// the specified length. The dimensions are then adjusted to be
360 /// multiples of 128.
361 ///
362 /// # Parameters
363 /// * `img` - The image to resize
364 /// * `resize_long` - Target length for the long side
365 ///
366 /// # Returns
367 /// A tuple containing:
368 /// 1. The resized image
369 /// 2. Array with resize ratios [ratio_h, ratio_w]
370 fn resize_image_type2(&self, img: DynamicImage, resize_long: u32) -> (DynamicImage, [f32; 2]) {
371 let (w, h) = img.dimensions();
372
373 // Calculate resize ratio based on which side is longer
374 // If height > width, resize based on height; otherwise resize based on width
375 let ratio = if h > w {
376 resize_long as f32 / h as f32
377 } else {
378 resize_long as f32 / w as f32
379 };
380
381 let mut resize_h = (h as f32 * ratio) as u32;
382 let mut resize_w = (w as f32 * ratio) as u32;
383
384 // Ensure dimensions are multiples of 128
385 let max_stride = 128;
386 // Round up to nearest multiple of 128 to ensure proper alignment
387 resize_h = resize_h.div_ceil(max_stride) * max_stride;
388 resize_w = resize_w.div_ceil(max_stride) * max_stride;
389
390 // Return original if no resize is needed
391 if resize_h == h && resize_w == w {
392 return (img, [1.0, 1.0]);
393 }
394
395 let resized_img =
396 img.resize_exact(resize_w, resize_h, image::imageops::FilterType::Lanczos3);
397 let ratio_h = resize_h as f32 / h as f32;
398 let ratio_w = resize_w as f32 / w as f32;
399
400 (resized_img, [ratio_h, ratio_w])
401 }
402
403 /// Resize type 3: Resize to specific input shape
404 ///
405 /// This method resizes the image to match the exact dimensions
406 /// specified in the input shape parameter (channels, height, width).
407 ///
408 /// # Parameters
409 /// * `img` - The image to resize
410 /// * `input_shape` - Target shape (channels, height, width)
411 ///
412 /// # Returns
413 /// A tuple containing:
414 /// 1. The resized image
415 /// 2. Array with resize ratios [ratio_h, ratio_w]
416 fn resize_image_type3(
417 &self,
418 img: DynamicImage,
419 input_shape: (u32, u32, u32),
420 ) -> (DynamicImage, [f32; 2]) {
421 let (ori_w, ori_h) = img.dimensions();
422 let (_, resize_h, resize_w) = input_shape;
423
424 // Return original if no resize is needed
425 if resize_h == ori_h && resize_w == ori_w {
426 return (img, [1.0, 1.0]);
427 }
428
429 let ratio_h = resize_h as f32 / ori_h as f32;
430 let ratio_w = resize_w as f32 / ori_w as f32;
431 let resized_img =
432 img.resize_exact(resize_w, resize_h, image::imageops::FilterType::Lanczos3);
433
434 (resized_img, [ratio_h, ratio_w])
435 }
436}