oar_ocr_core/utils/
image.rs

1//! Utility functions for image processing.
2//!
3//! This module provides functions for loading, converting, and manipulating images
4//! in the OCR pipeline. It includes functions for converting between different
5//! image formats, loading single or batch images from files, creating images
6//! from raw data, and resize-and-pad operations.
7
8use crate::core::OCRError;
9use crate::core::errors::ImageProcessError;
10use image::{DynamicImage, GrayImage, ImageBuffer, ImageError, ImageReader, RgbImage};
11use std::fs::File;
12use std::io::BufReader;
13use std::path::Path;
14
15/// Converts a DynamicImage to an RgbImage.
16///
17/// This function takes a DynamicImage (which can be in any format) and converts
18/// it to an RgbImage (8-bit RGB format).
19///
20/// # Arguments
21///
22/// * `img` - The DynamicImage to convert
23///
24/// # Returns
25///
26/// * `RgbImage` - The converted RGB image
27pub fn dynamic_to_rgb(img: DynamicImage) -> RgbImage {
28    img.to_rgb8()
29}
30
31/// Converts a DynamicImage to a GrayImage.
32///
33/// This function takes a DynamicImage (which can be in any format) and converts
34/// it to a GrayImage (8-bit grayscale format).
35///
36/// # Arguments
37///
38/// * `img` - The DynamicImage to convert
39///
40/// # Returns
41///
42/// * `GrayImage` - The converted grayscale image
43pub fn dynamic_to_gray(img: DynamicImage) -> GrayImage {
44    img.to_luma8()
45}
46
47/// Loads an image from the given bytes and converts it to RgbImage.
48///
49/// This function decodes an image from a byte slice and converts it
50/// to an RgbImage. It handles any image format supported by the image crate.
51///
52/// # Arguments
53///
54/// * `bytes` - A byte slice containing the encoded image data
55///
56/// # Returns
57///
58/// * `Ok(RgbImage)` - The decoded and converted RGB image
59/// * `Err(OCRError)` - An error if the image could not be decoded or converted
60///
61/// # Errors
62///
63/// This function will return an `OCRError::ImageLoad` error if the image cannot
64/// be decoded from the provided bytes, or if there is an error during conversion.
65pub fn load_image_from_memory(bytes: &[u8]) -> Result<RgbImage, OCRError> {
66    let img = image::load_from_memory(bytes).map_err(OCRError::ImageLoad)?;
67    Ok(dynamic_to_rgb(img))
68}
69
70/// Loads an image from a file path and converts it to RgbImage.
71///
72/// This function opens an image from the specified file path and converts it
73/// to an RgbImage. It handles any image format supported by the image crate.
74///
75/// # Arguments
76///
77/// * `path` - A reference to the path of the image file to load
78///
79/// # Returns
80///
81/// * `Ok(RgbImage)` - The loaded and converted RGB image
82/// * `Err(OCRError)` - An error if the image could not be loaded or converted
83///
84/// # Errors
85///
86/// This function will return an `OCRError::ImageLoad` error if the image cannot
87/// be loaded from the specified path, or if there is an error during conversion.
88pub fn load_image<P: AsRef<Path>>(path: P) -> Result<RgbImage, OCRError> {
89    let img = open_image_any_format(path.as_ref()).map_err(OCRError::ImageLoad)?;
90    Ok(dynamic_to_rgb(img))
91}
92
93fn open_image_any_format(path: &Path) -> Result<DynamicImage, ImageError> {
94    match image::open(path) {
95        Ok(img) => Ok(img),
96        Err(err) if should_retry(&err) => {
97            tracing::warn!(
98                "Standard decode failed for {} ({err}). Retrying with format sniffing.",
99                path.display()
100            );
101            decode_with_guessed_format(path)
102        }
103        Err(err) => Err(err),
104    }
105}
106
107fn should_retry(err: &ImageError) -> bool {
108    matches!(err, ImageError::Decoding(_) | ImageError::Unsupported(_))
109}
110
111fn decode_with_guessed_format(path: &Path) -> Result<DynamicImage, ImageError> {
112    let file = File::open(path)?;
113    let reader = BufReader::new(file);
114    let reader = ImageReader::new(reader).with_guessed_format()?;
115    reader.decode()
116}
117
118/// Creates an RgbImage from raw pixel data.
119///
120/// This function creates an RgbImage from raw pixel data. The data must be
121/// in RGB format (3 bytes per pixel) and the length must match the specified
122/// width and height.
123///
124/// # Arguments
125///
126/// * `width` - The width of the image in pixels
127/// * `height` - The height of the image in pixels
128/// * `data` - A vector containing the raw pixel data (RGB format)
129///
130/// # Returns
131///
132/// * `Some(RgbImage)` - The created RGB image if the data is valid
133/// * `None` - If the data length doesn't match the specified dimensions
134pub fn create_rgb_image(width: u32, height: u32, data: Vec<u8>) -> Option<RgbImage> {
135    if data.len() != (width * height * 3) as usize {
136        return None;
137    }
138
139    ImageBuffer::from_raw(width, height, data)
140}
141
142/// Checks if the given image size is valid (non-zero dimensions).
143pub fn check_image_size(size: &[u32; 2]) -> Result<(), ImageProcessError> {
144    if size[0] == 0 || size[1] == 0 {
145        return Err(ImageProcessError::InvalidCropSize);
146    }
147    Ok(())
148}
149
150/// Extracts a rectangular region from an RGB image.
151pub fn slice_image(
152    img: &RgbImage,
153    coords: (u32, u32, u32, u32),
154) -> Result<RgbImage, ImageProcessError> {
155    let (x1, y1, x2, y2) = coords;
156    let (img_width, img_height) = img.dimensions();
157
158    if x1 >= x2 || y1 >= y2 {
159        return Err(ImageProcessError::InvalidCropCoordinates);
160    }
161
162    if x2 > img_width || y2 > img_height {
163        return Err(ImageProcessError::CropOutOfBounds);
164    }
165
166    let crop_width = x2 - x1;
167    let crop_height = y2 - y1;
168
169    Ok(image::imageops::crop_imm(img, x1, y1, crop_width, crop_height).to_image())
170}
171
172/// Extracts a rectangular region from a grayscale image.
173pub fn slice_gray_image(
174    img: &GrayImage,
175    coords: (u32, u32, u32, u32),
176) -> Result<GrayImage, ImageProcessError> {
177    let (x1, y1, x2, y2) = coords;
178    let (img_width, img_height) = img.dimensions();
179
180    if x1 >= x2 || y1 >= y2 {
181        return Err(ImageProcessError::InvalidCropCoordinates);
182    }
183
184    if x2 > img_width || y2 > img_height {
185        return Err(ImageProcessError::CropOutOfBounds);
186    }
187
188    let crop_width = x2 - x1;
189    let crop_height = y2 - y1;
190
191    Ok(image::imageops::crop_imm(img, x1, y1, crop_width, crop_height).to_image())
192}
193
194/// Calculates centered crop coordinates for a target size.
195pub fn calculate_center_crop_coords(
196    img_width: u32,
197    img_height: u32,
198    crop_width: u32,
199    crop_height: u32,
200) -> Result<(u32, u32), ImageProcessError> {
201    if crop_width > img_width || crop_height > img_height {
202        return Err(ImageProcessError::CropSizeTooLarge);
203    }
204
205    let x = (img_width - crop_width) / 2;
206    let y = (img_height - crop_height) / 2;
207
208    Ok((x, y))
209}
210
211/// Validates that crop coordinates stay within image bounds.
212pub fn validate_crop_bounds(
213    img_width: u32,
214    img_height: u32,
215    x: u32,
216    y: u32,
217    crop_width: u32,
218    crop_height: u32,
219) -> Result<(), ImageProcessError> {
220    if x + crop_width > img_width || y + crop_height > img_height {
221        return Err(ImageProcessError::CropOutOfBounds);
222    }
223    Ok(())
224}
225
226/// Resizes an RGB image to the target dimensions using Lanczos3 filtering.
227///
228/// # Errors
229///
230/// Returns `ImageProcessError::InvalidCropSize` if width or height is 0.
231pub fn resize_image(
232    img: &RgbImage,
233    width: u32,
234    height: u32,
235) -> Result<RgbImage, ImageProcessError> {
236    if width == 0 || height == 0 {
237        return Err(ImageProcessError::InvalidCropSize);
238    }
239    Ok(image::imageops::resize(
240        img,
241        width,
242        height,
243        image::imageops::FilterType::Lanczos3,
244    ))
245}
246
247/// Resizes a grayscale image to the target dimensions using Lanczos3 filtering.
248///
249/// # Errors
250///
251/// Returns `ImageProcessError::InvalidCropSize` if width or height is 0.
252pub fn resize_gray_image(
253    img: &GrayImage,
254    width: u32,
255    height: u32,
256) -> Result<GrayImage, ImageProcessError> {
257    if width == 0 || height == 0 {
258        return Err(ImageProcessError::InvalidCropSize);
259    }
260    Ok(image::imageops::resize(
261        img,
262        width,
263        height,
264        image::imageops::FilterType::Lanczos3,
265    ))
266}
267
268/// Converts an RGB image to grayscale.
269pub fn rgb_to_grayscale(img: &RgbImage) -> GrayImage {
270    image::imageops::grayscale(img)
271}
272
273/// Pads an image to the specified dimensions with a fill color.
274pub fn pad_image(
275    img: &RgbImage,
276    target_width: u32,
277    target_height: u32,
278    fill_color: [u8; 3],
279) -> Result<RgbImage, ImageProcessError> {
280    let (src_width, src_height) = img.dimensions();
281
282    if target_width < src_width || target_height < src_height {
283        return Err(ImageProcessError::InvalidCropSize);
284    }
285
286    if target_width == src_width && target_height == src_height {
287        return Ok(img.clone());
288    }
289
290    let mut padded = RgbImage::from_pixel(target_width, target_height, image::Rgb(fill_color));
291    let x_offset = (target_width - src_width) / 2;
292    let y_offset = (target_height - src_height) / 2;
293    image::imageops::overlay(&mut padded, img, x_offset as i64, y_offset as i64);
294
295    Ok(padded)
296}
297
298/// Loads a batch of images from file paths.
299///
300/// This function loads multiple images from the specified file paths and
301/// converts them to RgbImages. It uses parallel processing when the number
302/// of images exceeds the default parallel threshold.
303///
304/// # Arguments
305///
306/// * `paths` - A slice of paths to the image files to load
307///
308/// # Returns
309///
310/// * `Ok(Vec<RgbImage>)` - A vector of loaded RGB images
311/// * `Err(OCRError)` - An error if any image could not be loaded
312///
313/// # Errors
314///
315/// This function will return an `OCRError` if any image cannot be loaded
316/// from its specified path.
317pub fn load_images<P: AsRef<std::path::Path> + Send + Sync>(
318    paths: &[P],
319) -> Result<Vec<RgbImage>, OCRError> {
320    load_images_batch_with_threshold(paths, None)
321}
322
323/// Loads a batch of images from file paths with a custom parallel threshold.
324///
325/// This function loads multiple images from the specified file paths and
326/// converts them to RgbImages. It uses parallel processing when the number
327/// of images exceeds the specified threshold, or the default threshold if
328/// none is provided.
329///
330/// # Arguments
331///
332/// * `paths` - A slice of paths to the image files to load
333/// * `parallel_threshold` - An optional threshold for parallel processing.
334///   If `None`, the default threshold from `DEFAULT_PARALLEL_THRESHOLD` is used.
335///
336/// # Returns
337///
338/// * `Ok(Vec<RgbImage>)` - A vector of loaded RGB images
339/// * `Err(OCRError)` - An error if any image could not be loaded
340///
341/// # Errors
342///
343/// This function will return an `OCRError` if any image cannot be loaded
344/// from its specified path.
345pub fn load_images_batch_with_threshold<P: AsRef<std::path::Path> + Send + Sync>(
346    paths: &[P],
347    parallel_threshold: Option<usize>,
348) -> Result<Vec<RgbImage>, OCRError> {
349    use crate::core::constants::DEFAULT_PARALLEL_THRESHOLD;
350
351    let threshold = parallel_threshold.unwrap_or(DEFAULT_PARALLEL_THRESHOLD);
352
353    if paths.len() > threshold {
354        use rayon::prelude::*;
355        paths.par_iter().map(|p| load_image(p.as_ref())).collect()
356    } else {
357        paths.iter().map(|p| load_image(p.as_ref())).collect()
358    }
359}
360
361/// Load multiple images from file paths using centralized parallel policy.
362///
363/// This function loads images from the provided file paths using the utility threshold
364/// from the centralized ParallelPolicy. If the number of paths exceeds the threshold,
365/// the loading is performed in parallel using rayon. Otherwise, images are loaded
366/// sequentially.
367///
368/// # Arguments
369///
370/// * `paths` - A slice of paths to image files
371/// * `policy` - The parallel policy containing the utility threshold
372///
373/// # Returns
374///
375/// A Result containing a vector of loaded RgbImages, or an OCRError if any image fails to load.
376///
377/// # Errors
378///
379/// This function will return an `OCRError` if any image cannot be loaded
380/// from its specified path.
381pub fn load_images_batch_with_policy<P: AsRef<std::path::Path> + Send + Sync>(
382    paths: &[P],
383    policy: &crate::core::config::ParallelPolicy,
384) -> Result<Vec<RgbImage>, OCRError> {
385    if paths.len() > policy.utility_threshold {
386        use rayon::prelude::*;
387        paths.par_iter().map(|p| load_image(p.as_ref())).collect()
388    } else {
389        paths.iter().map(|p| load_image(p.as_ref())).collect()
390    }
391}
392
393/// Padding strategy for resize-and-pad operations.
394#[derive(Debug, Clone, Copy, PartialEq, Default)]
395pub enum PaddingStrategy {
396    /// Pad with a solid color
397    SolidColor([u8; 3]),
398    /// Pad with black (equivalent to SolidColor([0, 0, 0]))
399    #[default]
400    Black,
401    /// Left-align the resized image (no centering)
402    LeftAlign([u8; 3]),
403}
404
405/// Configuration for resize-and-pad operations.
406#[derive(Debug, Clone)]
407pub struct ResizePadConfig {
408    /// Target dimensions (width, height)
409    pub target_dims: (u32, u32),
410    /// Padding strategy to use
411    pub padding_strategy: PaddingStrategy,
412    /// Filter type for resizing
413    pub filter_type: image::imageops::FilterType,
414}
415
416impl ResizePadConfig {
417    /// Create a new resize-pad configuration.
418    pub fn new(target_dims: (u32, u32)) -> Self {
419        Self {
420            target_dims,
421            padding_strategy: PaddingStrategy::default(),
422            filter_type: image::imageops::FilterType::Triangle,
423        }
424    }
425
426    /// Set the padding strategy.
427    pub fn with_padding_strategy(mut self, strategy: PaddingStrategy) -> Self {
428        self.padding_strategy = strategy;
429        self
430    }
431
432    /// Set the filter type for resizing.
433    pub fn with_filter_type(mut self, filter_type: image::imageops::FilterType) -> Self {
434        self.filter_type = filter_type;
435        self
436    }
437}
438
439/// Resize an image to fit within target dimensions while maintaining aspect ratio,
440/// then pad to exact target dimensions.
441///
442/// This function provides a unified approach to resize-and-pad operations that
443/// can replace the duplicated logic found in various processors.
444///
445/// # Arguments
446///
447/// * `image` - The input RGB image to resize and pad
448/// * `config` - Configuration for the resize-and-pad operation
449///
450/// # Returns
451///
452/// A resized and padded RGB image with exact target dimensions.
453///
454/// # Errors
455///
456/// Returns `ImageProcessError::InvalidCropSize` if target dimensions are 0.
457pub fn resize_and_pad(
458    image: &RgbImage,
459    config: &ResizePadConfig,
460) -> Result<RgbImage, ImageProcessError> {
461    let (target_width, target_height) = config.target_dims;
462
463    if target_width == 0 || target_height == 0 {
464        return Err(ImageProcessError::InvalidCropSize);
465    }
466
467    let (orig_width, orig_height) = image.dimensions();
468
469    // Calculate scaling factor to fit within target dimensions while maintaining aspect ratio
470    let scale_w = target_width as f32 / orig_width as f32;
471    let scale_h = target_height as f32 / orig_height as f32;
472    let scale = scale_w.min(scale_h);
473
474    // Calculate new dimensions
475    let new_width = (orig_width as f32 * scale) as u32;
476    let new_height = (orig_height as f32 * scale) as u32;
477
478    // Resize the image
479    let resized = image::imageops::resize(image, new_width, new_height, config.filter_type);
480
481    // Create padded image with target dimensions
482    let padding_color = match config.padding_strategy {
483        PaddingStrategy::SolidColor(color) => color,
484        PaddingStrategy::Black => [0, 0, 0],
485        PaddingStrategy::LeftAlign(color) => color,
486    };
487    let padding_rgb = image::Rgb(padding_color);
488    let mut padded = ImageBuffer::from_pixel(target_width, target_height, padding_rgb);
489
490    // Calculate padding offsets
491    let (pad_x, pad_y) = match config.padding_strategy {
492        PaddingStrategy::LeftAlign(_) => (0, 0),
493        _ => {
494            // Center the image
495            let pad_x = (target_width - new_width) / 2;
496            let pad_y = (target_height - new_height) / 2;
497            (pad_x, pad_y)
498        }
499    };
500
501    // Copy resized image to padded image using efficient overlay
502    image::imageops::overlay(&mut padded, &resized, pad_x as i64, pad_y as i64);
503
504    Ok(padded)
505}
506
507/// Configuration for OCR-style resize-and-pad operations with width constraints.
508#[derive(Debug, Clone)]
509pub struct OCRResizePadConfig {
510    /// Target height
511    pub target_height: u32,
512    /// Maximum allowed width
513    pub max_width: u32,
514    /// Padding strategy to use
515    pub padding_strategy: PaddingStrategy,
516    /// Filter type for resizing
517    pub filter_type: image::imageops::FilterType,
518}
519
520impl OCRResizePadConfig {
521    /// Create a new OCR resize-pad configuration.
522    ///
523    /// Uses Triangle (bilinear) interpolation to match OpenCV's cv2.resize default behavior.
524    pub fn new(target_height: u32, max_width: u32) -> Self {
525        Self {
526            target_height,
527            max_width,
528            padding_strategy: PaddingStrategy::default(),
529            // Use Triangle (bilinear) to match cv2.resize INTER_LINEAR
530            filter_type: image::imageops::FilterType::Triangle,
531        }
532    }
533
534    /// Set the padding strategy.
535    pub fn with_padding_strategy(mut self, strategy: PaddingStrategy) -> Self {
536        self.padding_strategy = strategy;
537        self
538    }
539
540    /// Set the filter type for resizing.
541    pub fn with_filter_type(mut self, filter_type: image::imageops::FilterType) -> Self {
542        self.filter_type = filter_type;
543        self
544    }
545}
546
547/// Resize an image for OCR processing with width constraints and padding.
548///
549/// This function handles the specific resize-and-pad logic used in OCR processing,
550/// where images are resized to a fixed height while maintaining aspect ratio,
551/// with a maximum width constraint, and then padded to a target width.
552///
553/// # Arguments
554///
555/// * `image` - The input RGB image to resize and pad
556/// * `config` - Configuration for the OCR resize-and-pad operation
557/// * `target_width_ratio` - Optional ratio to calculate target width from height.
558///   If None, uses the image's original aspect ratio.
559///
560/// # Returns
561///
562/// A tuple containing:
563/// - The resized and padded RGB image
564/// - The actual width used for the padded image
565///
566/// # Errors
567///
568/// Returns `ImageProcessError::InvalidCropSize` if target height is 0.
569pub fn ocr_resize_and_pad(
570    image: &RgbImage,
571    config: &OCRResizePadConfig,
572    target_width_ratio: Option<f32>,
573) -> Result<(RgbImage, u32), ImageProcessError> {
574    if config.target_height == 0 {
575        return Err(ImageProcessError::InvalidCropSize);
576    }
577
578    let (original_w, original_h) = image.dimensions();
579    let original_ratio = original_w as f32 / original_h as f32;
580
581    // Calculate target width based on ratio or original aspect ratio
582    let mut target_w = if let Some(ratio) = target_width_ratio {
583        (config.target_height as f32 * ratio) as u32
584    } else {
585        (config.target_height as f32 * original_ratio).ceil() as u32
586    };
587
588    // Apply maximum width constraint
589    let resized_w = if target_w > config.max_width {
590        target_w = config.max_width;
591        config.max_width
592    } else {
593        // Calculate actual resized width based on aspect ratio
594        let ratio = original_w as f32 / original_h as f32;
595        if (config.target_height as f32 * ratio).ceil() as u32 > target_w {
596            target_w
597        } else {
598            (config.target_height as f32 * ratio).ceil() as u32
599        }
600    };
601
602    // Resize the image
603    let resized_image =
604        image::imageops::resize(image, resized_w, config.target_height, config.filter_type);
605
606    // Create padded image with target dimensions
607    let padding_color = match config.padding_strategy {
608        PaddingStrategy::SolidColor(color) => color,
609        PaddingStrategy::Black => [0, 0, 0],
610        PaddingStrategy::LeftAlign(color) => color,
611    };
612    let padding_rgb = image::Rgb(padding_color);
613    let mut padded_image = ImageBuffer::from_pixel(target_w, config.target_height, padding_rgb);
614
615    // Copy resized image to padded image (left-aligned for OCR)
616    image::imageops::overlay(&mut padded_image, &resized_image, 0, 0);
617
618    Ok((padded_image, target_w))
619}
620
621/// Resizes a batch of images to the specified dimensions.
622///
623/// This function provides a unified approach to batch image resizing that can replace
624/// duplicated resize loops found in various predictors. It supports both functional
625/// and imperative styles and can optionally apply post-processing operations.
626///
627/// # Arguments
628///
629/// * `images` - A slice of RGB images to resize
630/// * `target_width` - Target width for all images
631/// * `target_height` - Target height for all images
632/// * `filter_type` - The filter type to use for resizing (defaults to Lanczos3 if None)
633///
634/// # Returns
635///
636/// A vector of resized RGB images.
637///
638/// # Example
639///
640/// ```rust,no_run
641/// use oar_ocr_core::utils::resize_images_batch;
642/// use image::RgbImage;
643///
644/// let images = vec![RgbImage::new(100, 100), RgbImage::new(200, 150)];
645/// let resized = resize_images_batch(&images, 224, 224, None);
646/// assert_eq!(resized.len(), 2);
647/// assert_eq!(resized[0].dimensions(), (224, 224));
648/// ```
649pub fn resize_images_batch(
650    images: &[RgbImage],
651    target_width: u32,
652    target_height: u32,
653    filter_type: Option<image::imageops::FilterType>,
654) -> Vec<RgbImage> {
655    let filter = filter_type.unwrap_or(image::imageops::FilterType::Lanczos3);
656
657    images
658        .iter()
659        .map(|img| image::imageops::resize(img, target_width, target_height, filter))
660        .collect()
661}
662
663/// Resizes a batch of images and converts them to DynamicImage format.
664///
665/// This function combines batch resizing with conversion to DynamicImage format,
666/// which is commonly needed in OCR preprocessing pipelines.
667///
668/// # Arguments
669///
670/// * `images` - A slice of RGB images to resize
671/// * `target_width` - Target width for all images
672/// * `target_height` - Target height for all images
673/// * `filter_type` - The filter type to use for resizing (defaults to Lanczos3 if None)
674///
675/// # Returns
676///
677/// A vector of resized images as DynamicImage instances.
678pub fn resize_images_batch_to_dynamic(
679    images: &[RgbImage],
680    target_width: u32,
681    target_height: u32,
682    filter_type: Option<image::imageops::FilterType>,
683) -> Vec<DynamicImage> {
684    let filter = filter_type.unwrap_or(image::imageops::FilterType::Lanczos3);
685
686    images
687        .iter()
688        .map(|img| {
689            let resized = image::imageops::resize(img, target_width, target_height, filter);
690            DynamicImage::ImageRgb8(resized)
691        })
692        .collect()
693}
694
695/// Masks a rectangular region in an RGB image with a solid color.
696///
697/// This function fills the specified rectangular region with a solid color,
698/// which is useful for masking formula regions before text detection to prevent
699/// formulas from being incorrectly detected as text (as done in PP-StructureV3).
700///
701/// # Arguments
702///
703/// * `image` - A mutable reference to the RGB image to mask
704/// * `x1` - Left coordinate of the region
705/// * `y1` - Top coordinate of the region
706/// * `x2` - Right coordinate of the region
707/// * `y2` - Bottom coordinate of the region
708/// * `fill_color` - The color to fill the masked region with (default: white [255, 255, 255])
709///
710/// # Returns
711///
712/// Returns `Ok(())` if masking succeeds, or an error if coordinates are invalid.
713///
714/// # Example
715///
716/// ```rust,no_run
717/// use oar_ocr_core::utils::mask_region;
718/// use image::RgbImage;
719///
720/// let mut image = RgbImage::new(100, 100);
721/// // Mask a formula region from (10, 10) to (50, 30) with white
722/// mask_region(&mut image, 10, 10, 50, 30, [255, 255, 255]).unwrap();
723/// ```
724pub fn mask_region(
725    image: &mut RgbImage,
726    x1: u32,
727    y1: u32,
728    x2: u32,
729    y2: u32,
730    fill_color: [u8; 3],
731) -> Result<(), ImageProcessError> {
732    let (img_width, img_height) = image.dimensions();
733
734    // Clamp coordinates to image bounds
735    let x1 = x1.min(img_width);
736    let y1 = y1.min(img_height);
737    let x2 = x2.min(img_width);
738    let y2 = y2.min(img_height);
739
740    if x1 >= x2 || y1 >= y2 {
741        return Err(ImageProcessError::InvalidCropCoordinates);
742    }
743
744    let rgb = image::Rgb(fill_color);
745    for y in y1..y2 {
746        for x in x1..x2 {
747            image.put_pixel(x, y, rgb);
748        }
749    }
750
751    Ok(())
752}
753
754/// Masks multiple bounding box regions in an RGB image.
755///
756/// This function masks multiple regions by filling them with a solid color.
757/// It is useful for batch masking multiple formula or other regions before
758/// text detection (as done in PP-StructureV3).
759///
760/// # Arguments
761///
762/// * `image` - A mutable reference to the RGB image to mask
763/// * `bboxes` - A slice of bounding boxes to mask. Each bbox should provide
764///   `x_min()`, `y_min()`, `x_max()`, `y_max()` methods.
765/// * `fill_color` - The color to fill the masked regions with
766///
767/// # Example
768///
769/// ```rust,no_run
770/// use oar_ocr_core::utils::mask_regions;
771/// use oar_ocr_core::processors::BoundingBox;
772/// use image::RgbImage;
773///
774/// let mut image = RgbImage::new(100, 100);
775/// let bboxes = vec![
776///     BoundingBox::from_coords(10.0, 10.0, 30.0, 30.0),
777///     BoundingBox::from_coords(50.0, 50.0, 70.0, 70.0),
778/// ];
779/// mask_regions(&mut image, &bboxes, [255, 255, 255]);
780/// ```
781pub fn mask_regions(
782    image: &mut RgbImage,
783    bboxes: &[crate::processors::BoundingBox],
784    fill_color: [u8; 3],
785) {
786    for bbox in bboxes {
787        let x1 = bbox.x_min() as u32;
788        let y1 = bbox.y_min() as u32;
789        let x2 = bbox.x_max() as u32;
790        let y2 = bbox.y_max() as u32;
791
792        // Ignore errors for individual regions (they might be out of bounds)
793        let _ = mask_region(image, x1, y1, x2, y2, fill_color);
794    }
795}
796
797#[cfg(test)]
798mod tests {
799    use super::*;
800    use ::image::{GenericImageView, GrayImage, ImageBuffer, Rgb, RgbImage};
801
802    fn create_test_image(width: u32, height: u32, color: [u8; 3]) -> RgbImage {
803        ImageBuffer::from_pixel(width, height, Rgb(color))
804    }
805
806    #[test]
807    fn basic_size_checks() {
808        assert!(check_image_size(&[100, 100]).is_ok());
809        assert!(check_image_size(&[0, 50]).is_err());
810    }
811
812    #[test]
813    fn slice_rgb_image_region() {
814        let img = RgbImage::from_pixel(10, 10, Rgb([255, 0, 0]));
815        let cropped = slice_image(&img, (2, 2, 6, 6)).unwrap();
816        assert_eq!(cropped.dimensions(), (4, 4));
817        assert!(slice_image(&img, (6, 6, 2, 2)).is_err());
818    }
819
820    #[test]
821    fn slice_gray_image_region() {
822        let img = GrayImage::from_pixel(10, 10, image::Luma([128]));
823        let cropped = slice_gray_image(&img, (1, 1, 5, 5)).unwrap();
824        assert_eq!(cropped.dimensions(), (4, 4));
825    }
826
827    #[test]
828    fn center_crop_coordinates() {
829        let coords = calculate_center_crop_coords(100, 60, 40, 20).unwrap();
830        assert_eq!(coords, (30, 20));
831        assert!(calculate_center_crop_coords(20, 20, 40, 10).is_err());
832    }
833
834    #[test]
835    fn crop_bounds_validation() {
836        assert!(validate_crop_bounds(100, 80, 10, 10, 40, 40).is_ok());
837        assert!(validate_crop_bounds(100, 80, 70, 10, 40, 40).is_err());
838    }
839
840    #[test]
841    fn pad_image_to_target() {
842        let img = RgbImage::from_pixel(20, 20, Rgb([10, 20, 30]));
843        let padded = pad_image(&img, 40, 40, [0, 0, 0]).unwrap();
844        assert_eq!(padded.dimensions(), (40, 40));
845        assert!(pad_image(&img, 10, 10, [0, 0, 0]).is_err());
846    }
847
848    #[test]
849    fn test_resize_and_pad_with_custom_padding() {
850        let image = create_test_image(50, 100, [255, 0, 0]); // 1:2 aspect ratio (tall)
851        let config = ResizePadConfig::new((80, 80))
852            .with_padding_strategy(PaddingStrategy::SolidColor([0, 255, 0])); // Green padding
853
854        let result = resize_and_pad(&image, &config).unwrap();
855
856        assert_eq!(result.dimensions(), (80, 80));
857
858        // The resized image should be 40x80 (maintaining 1:2 ratio), centered in 80x80
859        // So there should be 20 pixels of padding on left and right
860        let center_pixel = result.get_pixel(40, 40); // Center of image
861        assert_eq!(*center_pixel, Rgb([255, 0, 0])); // Should be red (original image)
862
863        let left_padding = result.get_pixel(10, 40); // Left padding area
864        assert_eq!(*left_padding, Rgb([0, 255, 0])); // Should be green (custom padding)
865    }
866
867    #[test]
868    fn test_resize_and_pad_left_align() {
869        let image = create_test_image(50, 100, [0, 0, 255]); // 1:2 aspect ratio (tall)
870        let config = ResizePadConfig::new((80, 80))
871            .with_padding_strategy(PaddingStrategy::LeftAlign([255, 255, 0])); // Yellow padding, left-aligned
872
873        let result = resize_and_pad(&image, &config).unwrap();
874
875        assert_eq!(result.dimensions(), (80, 80));
876
877        // The resized image should be 40x80, left-aligned in 80x80
878        let left_edge_pixel = result.get_pixel(20, 40); // Should be in the resized image
879        assert_eq!(*left_edge_pixel, Rgb([0, 0, 255])); // Should be blue (original image)
880
881        let right_padding = result.get_pixel(60, 40); // Right padding area
882        assert_eq!(*right_padding, Rgb([255, 255, 0])); // Should be yellow (padding)
883    }
884
885    #[test]
886    fn test_resize_images_batch() {
887        // Create test images with different sizes
888        let img1 = create_test_image(100, 50, [255, 0, 0]); // Red
889        let img2 = create_test_image(200, 100, [0, 255, 0]); // Green
890        let images = vec![img1, img2];
891
892        // Resize batch to 64x64
893        let resized = resize_images_batch(&images, 64, 64, None);
894
895        assert_eq!(resized.len(), 2);
896        assert_eq!(resized[0].dimensions(), (64, 64));
897        assert_eq!(resized[1].dimensions(), (64, 64));
898
899        // Check that the colors are preserved (approximately)
900        let pixel1 = resized[0].get_pixel(32, 32);
901        let pixel2 = resized[1].get_pixel(32, 32);
902
903        // Red image should still be predominantly red
904        assert!(pixel1[0] > pixel1[1] && pixel1[0] > pixel1[2]);
905        // Green image should still be predominantly green
906        assert!(pixel2[1] > pixel2[0] && pixel2[1] > pixel2[2]);
907    }
908
909    #[test]
910    fn test_resize_images_batch_to_dynamic() {
911        // Create test images
912        let img1 = create_test_image(100, 50, [255, 0, 0]);
913        let img2 = create_test_image(200, 100, [0, 255, 0]);
914        let images = vec![img1, img2];
915
916        // Resize batch to 32x32 and convert to DynamicImage
917        let resized = resize_images_batch_to_dynamic(&images, 32, 32, None);
918
919        assert_eq!(resized.len(), 2);
920
921        // Check that they are DynamicImage::ImageRgb8 variants
922        for dynamic_img in &resized {
923            assert_eq!(dynamic_img.dimensions(), (32, 32));
924            assert!(
925                matches!(dynamic_img, DynamicImage::ImageRgb8(_)),
926                "Expected ImageRgb8 variant"
927            );
928        }
929    }
930
931    #[test]
932    fn test_resize_images_batch_empty() {
933        let images: Vec<RgbImage> = vec![];
934        let resized = resize_images_batch(&images, 64, 64, None);
935        assert!(resized.is_empty());
936    }
937
938    #[test]
939    fn test_resize_images_batch_custom_filter() {
940        let img = create_test_image(100, 100, [128, 128, 128]);
941        let images = vec![img];
942
943        // Test with different filter types
944        let resized_lanczos =
945            resize_images_batch(&images, 50, 50, Some(image::imageops::FilterType::Lanczos3));
946        let resized_nearest =
947            resize_images_batch(&images, 50, 50, Some(image::imageops::FilterType::Nearest));
948
949        assert_eq!(resized_lanczos.len(), 1);
950        assert_eq!(resized_nearest.len(), 1);
951        assert_eq!(resized_lanczos[0].dimensions(), (50, 50));
952        assert_eq!(resized_nearest[0].dimensions(), (50, 50));
953    }
954
955    #[test]
956    fn test_ocr_resize_and_pad_with_max_width_constraint() {
957        let image = create_test_image(400, 100, [200, 100, 50]); // 4:1 aspect ratio
958        let config = OCRResizePadConfig::new(32, 100); // Height 32, max width 100
959
960        let (result, actual_width) = ocr_resize_and_pad(&image, &config, None).unwrap();
961
962        assert_eq!(result.height(), 32);
963        assert_eq!(actual_width, 100); // Should be constrained to max width
964        assert_eq!(result.width(), 100);
965
966        // Check that the image is left-aligned
967        let left_pixel = result.get_pixel(0, 16); // Left edge, middle height
968        assert_eq!(*left_pixel, Rgb([200, 100, 50])); // Should be original color
969    }
970
971    #[test]
972    fn test_ocr_resize_and_pad_with_target_ratio() {
973        let image = create_test_image(100, 50, [255, 128, 64]); // 2:1 aspect ratio
974        let config = OCRResizePadConfig::new(32, 200); // Height 32, max width 200
975        let target_ratio = 3.0; // Force 3:1 ratio
976
977        let (result, actual_width) =
978            ocr_resize_and_pad(&image, &config, Some(target_ratio)).unwrap();
979
980        assert_eq!(result.height(), 32);
981        assert_eq!(actual_width, 96); // 32 * 3.0 = 96
982        assert_eq!(result.width(), 96);
983    }
984
985    #[test]
986    fn test_resize_pad_config_builder() {
987        let config = ResizePadConfig::new((100, 50))
988            .with_padding_strategy(PaddingStrategy::SolidColor([255, 0, 0]))
989            .with_filter_type(image::imageops::FilterType::Lanczos3);
990
991        assert_eq!(config.target_dims, (100, 50));
992        assert_eq!(
993            config.padding_strategy,
994            PaddingStrategy::SolidColor([255, 0, 0])
995        );
996        assert_eq!(config.filter_type, image::imageops::FilterType::Lanczos3);
997    }
998
999    #[test]
1000    fn test_ocr_resize_pad_config_builder() {
1001        let config = OCRResizePadConfig::new(64, 320)
1002            .with_padding_strategy(PaddingStrategy::SolidColor([100, 100, 100]))
1003            .with_filter_type(image::imageops::FilterType::Nearest);
1004
1005        assert_eq!(config.target_height, 64);
1006        assert_eq!(config.max_width, 320);
1007        assert_eq!(
1008            config.padding_strategy,
1009            PaddingStrategy::SolidColor([100, 100, 100])
1010        );
1011        assert_eq!(config.filter_type, image::imageops::FilterType::Nearest);
1012    }
1013}
oar_ocr_core/utils/image.rs

oar_ocr_core/utils/
image.rs