oar_ocr/processors/resize/
detection.rs

1//! Image resizing utilities for OCR preprocessing
2//!
3//! This module provides functionality to resize images for OCR processing,
4//! supporting different resizing strategies based on the requirements of
5//! various OCR models. The main struct `DetResizeForTest` handles different
6//! types of resizing operations.
7//!
8//! # Resize Types
9//! - Type0: Resize based on limit side length with different behaviors depending on the limit type
10//! - Type1: Resize to specific dimensions with optional aspect ratio preservation
11//! - Type2: Resize long side to specific length
12//! - Type3: Resize to specific input shape
13//!
14//! # Limit Types
15//! - Max: Resize if the longest side exceeds the limit
16//! - Min: Resize if the shortest side is below the limit
17//! - ResizeLong: Resize the long side to match the limit
18
19use crate::core::constants::{DEFAULT_LIMIT_SIDE_LEN, DEFAULT_MAX_SIDE_LIMIT};
20use crate::processors::types::{LimitType, ResizeType};
21use image::{DynamicImage, GenericImageView};
22use tracing::{error, warn};
23
24/// A struct for resizing images for OCR testing
25///
26/// This struct encapsulates different resizing strategies for preparing
27/// images for OCR processing. It supports multiple resize types based
28/// on the input parameters.
29#[derive(Debug)]
30pub struct DetResizeForTest {
31    /// The type of resizing to perform
32    pub resize_type: ResizeType,
33    /// The length to limit the side of the image to (optional)
34    pub limit_side_len: Option<u32>,
35    /// The type of limit to apply (min, max, or resize long) (optional)
36    pub limit_type: Option<LimitType>,
37    /// The maximum allowed side length
38    pub max_side_limit: u32,
39}
40
41impl DetResizeForTest {
42    /// Creates a new `DetResizeForTest` instance
43    ///
44    /// This constructor determines the resize type based on the provided parameters.
45    /// The resize type is determined in the following order:
46    /// 1. If `input_shape` is provided, uses Type3
47    /// 2. If `image_shape` is provided, uses Type1
48    /// 3. If `resize_long` is provided, uses Type2
49    /// 4. Otherwise, uses Type0 (default)
50    ///
51    /// # Parameters
52    /// * `input_shape` - Optional input shape (channels, height, width)
53    /// * `image_shape` - Optional target image shape (height, width)
54    /// * `keep_ratio` - Whether to maintain aspect ratio when resizing (used with image_shape)
55    /// * `limit_side_len` - Optional limit for side length
56    /// * `limit_type` - Optional limit type (min, max, or resize long)
57    /// * `resize_long` - Optional length to resize the long side to
58    /// * `max_side_limit` - Optional maximum side length limit
59    ///
60    /// # Returns
61    /// A new `DetResizeForTest` instance with the determined resize type and parameters
62    pub fn new(
63        input_shape: Option<(u32, u32, u32)>,
64        image_shape: Option<(u32, u32)>,
65        keep_ratio: Option<bool>,
66        limit_side_len: Option<u32>,
67        limit_type: Option<LimitType>,
68        resize_long: Option<u32>,
69        max_side_limit: Option<u32>,
70    ) -> Self {
71        // Determine resize type based on provided parameters
72        // Priority order: Type3 -> Type1 -> Type2 -> Type0 (default)
73        let resize_type = if let Some(shape) = input_shape {
74            // Type3: Resize to specific input shape (channels, height, width)
75            ResizeType::Type3 { input_shape: shape }
76        } else if let Some(shape) = image_shape {
77            // Type1: Resize to specific dimensions with optional aspect ratio preservation
78            ResizeType::Type1 {
79                image_shape: shape,
80                keep_ratio: keep_ratio.unwrap_or(false),
81            }
82        } else if let Some(long) = resize_long {
83            // Type2: Resize long side to specific length
84            ResizeType::Type2 { resize_long: long }
85        } else {
86            // Type0: Resize based on limit side length (default)
87            ResizeType::Type0
88        };
89
90        Self {
91            resize_type,
92            limit_side_len: limit_side_len.or(Some(DEFAULT_LIMIT_SIDE_LEN)),
93            limit_type: limit_type.or(Some(LimitType::Min)),
94            max_side_limit: max_side_limit.unwrap_or(DEFAULT_MAX_SIDE_LIMIT),
95        }
96    }
97
98    /// Applies resizing to a batch of images
99    ///
100    /// This method processes a vector of images, applying the configured
101    /// resize operation to each one.
102    ///
103    /// # Parameters
104    /// * `imgs` - Vector of images to resize
105    /// * `limit_side_len` - Optional override for limit side length
106    /// * `limit_type` - Optional override for limit type
107    /// * `max_side_limit` - Optional override for maximum side limit
108    ///
109    /// # Returns
110    /// A tuple containing:
111    /// 1. Vector of resized images
112    /// 2. Vector of original image shapes and resize ratios [height, width, ratio_h, ratio_w]
113    pub fn apply(
114        &self,
115        imgs: Vec<DynamicImage>,
116        limit_side_len: Option<u32>,
117        limit_type: Option<LimitType>,
118        max_side_limit: Option<u32>,
119    ) -> (Vec<DynamicImage>, Vec<[f32; 4]>) {
120        let mut resize_imgs = Vec::new();
121        let mut img_shapes = Vec::new();
122
123        // Process each image in the batch
124        for img in imgs {
125            let (resized_img, shape) =
126                self.resize(img, limit_side_len, limit_type.as_ref(), max_side_limit);
127            resize_imgs.push(resized_img);
128            img_shapes.push(shape);
129        }
130
131        (resize_imgs, img_shapes)
132    }
133
134    /// Resizes a single image based on the configured resize type
135    ///
136    /// This method applies the appropriate resize operation based on the
137    /// `resize_type` field. It also handles small images by padding them
138    /// if their dimensions are less than 64 pixels in total.
139    ///
140    /// # Parameters
141    /// * `img` - The image to resize
142    /// * `limit_side_len` - Optional override for limit side length
143    /// * `limit_type` - Optional override for limit type
144    /// * `max_side_limit` - Optional override for maximum side limit
145    ///
146    /// # Returns
147    /// A tuple containing:
148    /// 1. The resized image
149    /// 2. Array with original dimensions and resize ratios [height, width, ratio_h, ratio_w]
150    fn resize(
151        &self,
152        mut img: DynamicImage,
153        limit_side_len: Option<u32>,
154        limit_type: Option<&LimitType>,
155        max_side_limit: Option<u32>,
156    ) -> (DynamicImage, [f32; 4]) {
157        let (src_w, src_h) = img.dimensions();
158
159        // Pad small images to avoid issues with OCR processing
160        // Images with total dimensions less than 64 pixels can cause problems in OCR models
161        if (src_h + src_w) < 64 {
162            img = self.image_padding(img);
163        }
164
165        let (resized_img, ratios) = match &self.resize_type {
166            ResizeType::Type0 => {
167                self.resize_image_type0(img, limit_side_len, limit_type, max_side_limit)
168            }
169            ResizeType::Type1 {
170                image_shape,
171                keep_ratio,
172            } => self.resize_image_type1(img, *image_shape, *keep_ratio),
173            ResizeType::Type2 { resize_long } => self.resize_image_type2(img, *resize_long),
174            ResizeType::Type3 { input_shape } => self.resize_image_type3(img, *input_shape),
175        };
176
177        let shape = [src_h as f32, src_w as f32, ratios[0], ratios[1]];
178        (resized_img, shape)
179    }
180
181    /// Pads small images to a minimum size
182    ///
183    /// Ensures that images have a minimum dimension of 32x32 pixels
184    /// by padding them with black pixels if needed.
185    ///
186    /// # Parameters
187    /// * `img` - The image to pad
188    ///
189    /// # Returns
190    /// The padded image (or original if no padding was needed)
191    fn image_padding(&self, img: DynamicImage) -> DynamicImage {
192        let (w, h) = img.dimensions();
193        // Ensure minimum dimension of 32 pixels for both width and height
194        let new_w = w.max(32);
195        let new_h = h.max(32);
196
197        // If image is already at least 32x32, return it unchanged
198        if new_w == w && new_h == h {
199            return img;
200        }
201
202        // Create a new image with the padded dimensions
203        let mut padded = DynamicImage::new_rgb8(new_w, new_h);
204        // Overlay the original image onto the padded image at position (0,0)
205        image::imageops::overlay(&mut padded, &img, 0, 0);
206        padded
207    }
208
209    /// Resize type 0: Resize based on limit side length
210    ///
211    /// This method resizes the image based on a limit for the side length,
212    /// with different behaviors depending on the limit type:
213    /// - Max: Resize if the longest side exceeds the limit
214    /// - Min: Resize if the shortest side is below the limit
215    /// - ResizeLong: Resize the long side to match the limit
216    ///
217    /// The resized dimensions are also adjusted to be multiples of 32,
218    /// and constrained by the maximum side limit.
219    ///
220    /// # Parameters
221    /// * `img` - The image to resize
222    /// * `limit_side_len` - Optional override for limit side length
223    /// * `limit_type` - Optional override for limit type
224    /// * `max_side_limit` - Optional override for maximum side limit
225    ///
226    /// # Returns
227    /// A tuple containing:
228    /// 1. The resized image
229    /// 2. Array with resize ratios [ratio_h, ratio_w]
230    fn resize_image_type0(
231        &self,
232        img: DynamicImage,
233        limit_side_len: Option<u32>,
234        limit_type: Option<&LimitType>,
235        max_side_limit: Option<u32>,
236    ) -> (DynamicImage, [f32; 2]) {
237        let (w, h) = img.dimensions();
238        let limit_side_len = limit_side_len
239            .or(self.limit_side_len)
240            .unwrap_or(DEFAULT_LIMIT_SIDE_LEN);
241        let limit_type = limit_type
242            .or(self.limit_type.as_ref())
243            .unwrap_or(&LimitType::Min);
244        let max_side_limit = max_side_limit.unwrap_or(self.max_side_limit);
245
246        // Calculate resize ratio based on limit type
247        let ratio = match limit_type {
248            LimitType::Max => {
249                // Resize if the longest side exceeds the limit
250                if h.max(w) > limit_side_len {
251                    limit_side_len as f32 / h.max(w) as f32
252                } else {
253                    1.0
254                }
255            }
256            LimitType::Min => {
257                // Resize if the shortest side is below the limit
258                if h.min(w) < limit_side_len {
259                    limit_side_len as f32 / h.min(w) as f32
260                } else {
261                    1.0
262                }
263            }
264            LimitType::ResizeLong => {
265                // Resize the long side to match the limit
266                limit_side_len as f32 / h.max(w) as f32
267            }
268        };
269
270        let mut resize_h = (h as f32 * ratio) as u32;
271        let mut resize_w = (w as f32 * ratio) as u32;
272
273        // Apply maximum side limit if exceeded
274        if resize_h.max(resize_w) > max_side_limit {
275            warn!(
276                "Resized image size ({}x{}) exceeds max_side_limit of {}. Resizing to fit within limit.",
277                resize_h, resize_w, max_side_limit
278            );
279            // Calculate ratio to scale down to fit within max_side_limit
280            let limit_ratio = max_side_limit as f32 / resize_h.max(resize_w) as f32;
281            resize_h = (resize_h as f32 * limit_ratio) as u32;
282            resize_w = (resize_w as f32 * limit_ratio) as u32;
283        }
284
285        // Ensure dimensions are multiples of 32 and at least 32 pixels
286        // Adding 16 before division ensures proper rounding to nearest multiple of 32
287        resize_h = ((resize_h + 16) / 32 * 32).max(32);
288        resize_w = ((resize_w + 16) / 32 * 32).max(32);
289
290        // Return original if no resize is needed
291        if resize_h == h && resize_w == w {
292            return (img, [1.0, 1.0]);
293        }
294
295        // Handle invalid resize dimensions
296        if resize_w == 0 || resize_h == 0 {
297            error!("Invalid resize dimensions: {}x{}", resize_w, resize_h);
298            return (img, [1.0, 1.0]);
299        }
300
301        let resized_img =
302            img.resize_exact(resize_w, resize_h, image::imageops::FilterType::Lanczos3);
303        let ratio_h = resize_h as f32 / h as f32;
304        let ratio_w = resize_w as f32 / w as f32;
305
306        (resized_img, [ratio_h, ratio_w])
307    }
308
309    /// Resize type 1: Resize to specific dimensions
310    ///
311    /// This method resizes the image to specific dimensions, with an option
312    /// to maintain the aspect ratio. When keeping the ratio, the width is
313    /// adjusted to maintain the aspect ratio and then rounded up to the
314    /// nearest multiple of 32.
315    ///
316    /// # Parameters
317    /// * `img` - The image to resize
318    /// * `image_shape` - Target dimensions (height, width)
319    /// * `keep_ratio` - Whether to maintain aspect ratio
320    ///
321    /// # Returns
322    /// A tuple containing:
323    /// 1. The resized image
324    /// 2. Array with resize ratios [ratio_h, ratio_w]
325    fn resize_image_type1(
326        &self,
327        img: DynamicImage,
328        image_shape: (u32, u32),
329        keep_ratio: bool,
330    ) -> (DynamicImage, [f32; 2]) {
331        let (ori_w, ori_h) = img.dimensions();
332        let (resize_h, mut resize_w) = image_shape;
333
334        // Adjust width to maintain aspect ratio if requested
335        if keep_ratio {
336            // Calculate new width based on aspect ratio: new_width = original_width * (target_height / original_height)
337            resize_w = (ori_w * resize_h) / ori_h;
338            // Round up to nearest multiple of 32 to ensure proper alignment for OCR models
339            let n = resize_w.div_ceil(32);
340            resize_w = n * 32;
341        }
342
343        // Return original if no resize is needed
344        if resize_h == ori_h && resize_w == ori_w {
345            return (img, [1.0, 1.0]);
346        }
347
348        let ratio_h = resize_h as f32 / ori_h as f32;
349        let ratio_w = resize_w as f32 / ori_w as f32;
350        let resized_img =
351            img.resize_exact(resize_w, resize_h, image::imageops::FilterType::Lanczos3);
352
353        (resized_img, [ratio_h, ratio_w])
354    }
355
356    /// Resize type 2: Resize long side to specific length
357    ///
358    /// This method resizes the image so that its longest side matches
359    /// the specified length. The dimensions are then adjusted to be
360    /// multiples of 128.
361    ///
362    /// # Parameters
363    /// * `img` - The image to resize
364    /// * `resize_long` - Target length for the long side
365    ///
366    /// # Returns
367    /// A tuple containing:
368    /// 1. The resized image
369    /// 2. Array with resize ratios [ratio_h, ratio_w]
370    fn resize_image_type2(&self, img: DynamicImage, resize_long: u32) -> (DynamicImage, [f32; 2]) {
371        let (w, h) = img.dimensions();
372
373        // Calculate resize ratio based on which side is longer
374        // If height > width, resize based on height; otherwise resize based on width
375        let ratio = if h > w {
376            resize_long as f32 / h as f32
377        } else {
378            resize_long as f32 / w as f32
379        };
380
381        let mut resize_h = (h as f32 * ratio) as u32;
382        let mut resize_w = (w as f32 * ratio) as u32;
383
384        // Ensure dimensions are multiples of 128
385        let max_stride = 128;
386        // Round up to nearest multiple of 128 to ensure proper alignment
387        resize_h = resize_h.div_ceil(max_stride) * max_stride;
388        resize_w = resize_w.div_ceil(max_stride) * max_stride;
389
390        // Return original if no resize is needed
391        if resize_h == h && resize_w == w {
392            return (img, [1.0, 1.0]);
393        }
394
395        let resized_img =
396            img.resize_exact(resize_w, resize_h, image::imageops::FilterType::Lanczos3);
397        let ratio_h = resize_h as f32 / h as f32;
398        let ratio_w = resize_w as f32 / w as f32;
399
400        (resized_img, [ratio_h, ratio_w])
401    }
402
403    /// Resize type 3: Resize to specific input shape
404    ///
405    /// This method resizes the image to match the exact dimensions
406    /// specified in the input shape parameter (channels, height, width).
407    ///
408    /// # Parameters
409    /// * `img` - The image to resize
410    /// * `input_shape` - Target shape (channels, height, width)
411    ///
412    /// # Returns
413    /// A tuple containing:
414    /// 1. The resized image
415    /// 2. Array with resize ratios [ratio_h, ratio_w]
416    fn resize_image_type3(
417        &self,
418        img: DynamicImage,
419        input_shape: (u32, u32, u32),
420    ) -> (DynamicImage, [f32; 2]) {
421        let (ori_w, ori_h) = img.dimensions();
422        let (_, resize_h, resize_w) = input_shape;
423
424        // Return original if no resize is needed
425        if resize_h == ori_h && resize_w == ori_w {
426            return (img, [1.0, 1.0]);
427        }
428
429        let ratio_h = resize_h as f32 / ori_h as f32;
430        let ratio_w = resize_w as f32 / ori_w as f32;
431        let resized_img =
432            img.resize_exact(resize_w, resize_h, image::imageops::FilterType::Lanczos3);
433
434        (resized_img, [ratio_h, ratio_w])
435    }
436}