oar_ocr_core/utils/
image.rs

1//! Utility functions for image processing.
2//!
3//! This module provides functions for loading, converting, and manipulating images
4//! in the OCR pipeline. It includes functions for converting between different
5//! image formats, loading single or batch images from files, creating images
6//! from raw data, and resize-and-pad operations.
7
8use crate::core::OCRError;
9use crate::core::errors::ImageProcessError;
10use image::{DynamicImage, GrayImage, ImageBuffer, ImageError, ImageReader, RgbImage};
11use std::fs::File;
12use std::io::BufReader;
13use std::path::Path;
14
15/// Converts a DynamicImage to an RgbImage.
16///
17/// This function takes a DynamicImage (which can be in any format) and converts
18/// it to an RgbImage (8-bit RGB format).
19///
20/// # Arguments
21///
22/// * `img` - The DynamicImage to convert
23///
24/// # Returns
25///
26/// * `RgbImage` - The converted RGB image
27pub fn dynamic_to_rgb(img: DynamicImage) -> RgbImage {
28    img.to_rgb8()
29}
30
31/// Converts a DynamicImage to a GrayImage.
32///
33/// This function takes a DynamicImage (which can be in any format) and converts
34/// it to a GrayImage (8-bit grayscale format).
35///
36/// # Arguments
37///
38/// * `img` - The DynamicImage to convert
39///
40/// # Returns
41///
42/// * `GrayImage` - The converted grayscale image
43pub fn dynamic_to_gray(img: DynamicImage) -> GrayImage {
44    img.to_luma8()
45}
46
47/// Loads an image from the given bytes and converts it to RgbImage.
48///
49/// This function decodes an image from a byte slice and converts it
50/// to an RgbImage. It handles any image format supported by the image crate.
51///
52/// # Arguments
53///
54/// * `bytes` - A byte slice containing the encoded image data
55///
56/// # Returns
57///
58/// * `Ok(RgbImage)` - The decoded and converted RGB image
59/// * `Err(OCRError)` - An error if the image could not be decoded or converted
60///
61/// # Errors
62///
63/// This function will return an `OCRError::ImageLoad` error if the image cannot
64/// be decoded from the provided bytes, or if there is an error during conversion.
65pub fn load_image_from_memory(bytes: &[u8]) -> Result<RgbImage, OCRError> {
66    let img = image::load_from_memory(bytes).map_err(OCRError::ImageLoad)?;
67    Ok(dynamic_to_rgb(img))
68}
69
70/// Loads an image from a file path and converts it to RgbImage.
71///
72/// This function opens an image from the specified file path and converts it
73/// to an RgbImage. It handles any image format supported by the image crate.
74///
75/// # Arguments
76///
77/// * `path` - A reference to the path of the image file to load
78///
79/// # Returns
80///
81/// * `Ok(RgbImage)` - The loaded and converted RGB image
82/// * `Err(OCRError)` - An error if the image could not be loaded or converted
83///
84/// # Errors
85///
86/// This function will return an `OCRError::ImageLoad` error if the image cannot
87/// be loaded from the specified path, or if there is an error during conversion.
88pub fn load_image<P: AsRef<Path>>(path: P) -> Result<RgbImage, OCRError> {
89    let img = open_image_any_format(path.as_ref()).map_err(OCRError::ImageLoad)?;
90    Ok(dynamic_to_rgb(img))
91}
92
93fn open_image_any_format(path: &Path) -> Result<DynamicImage, ImageError> {
94    match image::open(path) {
95        Ok(img) => Ok(img),
96        Err(err) if should_retry(&err) => {
97            tracing::warn!(
98                "Standard decode failed for {} ({err}). Retrying with format sniffing.",
99                path.display()
100            );
101            decode_with_guessed_format(path)
102        }
103        Err(err) => Err(err),
104    }
105}
106
107fn should_retry(err: &ImageError) -> bool {
108    matches!(err, ImageError::Decoding(_) | ImageError::Unsupported(_))
109}
110
111fn decode_with_guessed_format(path: &Path) -> Result<DynamicImage, ImageError> {
112    let file = File::open(path)?;
113    let reader = BufReader::new(file);
114    let reader = ImageReader::new(reader).with_guessed_format()?;
115    reader.decode()
116}
117
118/// Creates an RgbImage from raw pixel data.
119///
120/// This function creates an RgbImage from raw pixel data. The data must be
121/// in RGB format (3 bytes per pixel) and the length must match the specified
122/// width and height.
123///
124/// # Arguments
125///
126/// * `width` - The width of the image in pixels
127/// * `height` - The height of the image in pixels
128/// * `data` - A vector containing the raw pixel data (RGB format)
129///
130/// # Returns
131///
132/// * `Some(RgbImage)` - The created RGB image if the data is valid
133/// * `None` - If the data length doesn't match the specified dimensions
134pub fn create_rgb_image(width: u32, height: u32, data: Vec<u8>) -> Option<RgbImage> {
135    if data.len() != (width * height * 3) as usize {
136        return None;
137    }
138
139    ImageBuffer::from_raw(width, height, data)
140}
141
142/// Checks if the given image size is valid (non-zero dimensions).
143pub fn check_image_size(size: &[u32; 2]) -> Result<(), ImageProcessError> {
144    if size[0] == 0 || size[1] == 0 {
145        return Err(ImageProcessError::InvalidCropSize);
146    }
147    Ok(())
148}
149
150/// Extracts a rectangular region from an RGB image.
151pub fn slice_image(
152    img: &RgbImage,
153    coords: (u32, u32, u32, u32),
154) -> Result<RgbImage, ImageProcessError> {
155    let (x1, y1, x2, y2) = coords;
156    let (img_width, img_height) = img.dimensions();
157
158    if x1 >= x2 || y1 >= y2 {
159        return Err(ImageProcessError::InvalidCropCoordinates);
160    }
161
162    if x2 > img_width || y2 > img_height {
163        return Err(ImageProcessError::CropOutOfBounds);
164    }
165
166    let crop_width = x2 - x1;
167    let crop_height = y2 - y1;
168
169    Ok(image::imageops::crop_imm(img, x1, y1, crop_width, crop_height).to_image())
170}
171
172/// Extracts a rectangular region from a grayscale image.
173pub fn slice_gray_image(
174    img: &GrayImage,
175    coords: (u32, u32, u32, u32),
176) -> Result<GrayImage, ImageProcessError> {
177    let (x1, y1, x2, y2) = coords;
178    let (img_width, img_height) = img.dimensions();
179
180    if x1 >= x2 || y1 >= y2 {
181        return Err(ImageProcessError::InvalidCropCoordinates);
182    }
183
184    if x2 > img_width || y2 > img_height {
185        return Err(ImageProcessError::CropOutOfBounds);
186    }
187
188    let crop_width = x2 - x1;
189    let crop_height = y2 - y1;
190
191    Ok(image::imageops::crop_imm(img, x1, y1, crop_width, crop_height).to_image())
192}
193
194/// Calculates centered crop coordinates for a target size.
195pub fn calculate_center_crop_coords(
196    img_width: u32,
197    img_height: u32,
198    crop_width: u32,
199    crop_height: u32,
200) -> Result<(u32, u32), ImageProcessError> {
201    if crop_width > img_width || crop_height > img_height {
202        return Err(ImageProcessError::CropSizeTooLarge);
203    }
204
205    let x = (img_width - crop_width) / 2;
206    let y = (img_height - crop_height) / 2;
207
208    Ok((x, y))
209}
210
211/// Validates that crop coordinates stay within image bounds.
212pub fn validate_crop_bounds(
213    img_width: u32,
214    img_height: u32,
215    x: u32,
216    y: u32,
217    crop_width: u32,
218    crop_height: u32,
219) -> Result<(), ImageProcessError> {
220    if x + crop_width > img_width || y + crop_height > img_height {
221        return Err(ImageProcessError::CropOutOfBounds);
222    }
223    Ok(())
224}
225
226/// Resizes an RGB image to the target dimensions using Lanczos3 filtering.
227///
228/// # Errors
229///
230/// Returns `ImageProcessError::InvalidCropSize` if width or height is 0.
231pub fn resize_image(
232    img: &RgbImage,
233    width: u32,
234    height: u32,
235) -> Result<RgbImage, ImageProcessError> {
236    if width == 0 || height == 0 {
237        return Err(ImageProcessError::InvalidCropSize);
238    }
239    Ok(image::imageops::resize(
240        img,
241        width,
242        height,
243        image::imageops::FilterType::Lanczos3,
244    ))
245}
246
247/// Resizes a grayscale image to the target dimensions using Lanczos3 filtering.
248///
249/// # Errors
250///
251/// Returns `ImageProcessError::InvalidCropSize` if width or height is 0.
252pub fn resize_gray_image(
253    img: &GrayImage,
254    width: u32,
255    height: u32,
256) -> Result<GrayImage, ImageProcessError> {
257    if width == 0 || height == 0 {
258        return Err(ImageProcessError::InvalidCropSize);
259    }
260    Ok(image::imageops::resize(
261        img,
262        width,
263        height,
264        image::imageops::FilterType::Lanczos3,
265    ))
266}
267
268/// Converts an RGB image to grayscale.
269pub fn rgb_to_grayscale(img: &RgbImage) -> GrayImage {
270    image::imageops::grayscale(img)
271}
272
273/// Pads an image to the specified dimensions with a fill color.
274pub fn pad_image(
275    img: &RgbImage,
276    target_width: u32,
277    target_height: u32,
278    fill_color: [u8; 3],
279) -> Result<RgbImage, ImageProcessError> {
280    let (src_width, src_height) = img.dimensions();
281
282    if target_width < src_width || target_height < src_height {
283        return Err(ImageProcessError::InvalidCropSize);
284    }
285
286    if target_width == src_width && target_height == src_height {
287        return Ok(img.clone());
288    }
289
290    let mut padded = RgbImage::from_pixel(target_width, target_height, image::Rgb(fill_color));
291    let x_offset = (target_width - src_width) / 2;
292    let y_offset = (target_height - src_height) / 2;
293    image::imageops::overlay(&mut padded, img, x_offset as i64, y_offset as i64);
294
295    Ok(padded)
296}
297
298/// Loads a batch of images from file paths.
299///
300/// This function loads multiple images from the specified file paths and
301/// converts them to RgbImages. It uses parallel processing when the number
302/// of images exceeds the default parallel threshold.
303///
304/// # Arguments
305///
306/// * `paths` - A slice of paths to the image files to load
307///
308/// # Returns
309///
310/// * `Ok(Vec<RgbImage>)` - A vector of loaded RGB images
311/// * `Err(OCRError)` - An error if any image could not be loaded
312///
313/// # Errors
314///
315/// This function will return an `OCRError` if any image cannot be loaded
316/// from its specified path.
317pub fn load_images<P: AsRef<std::path::Path> + Send + Sync>(
318    paths: &[P],
319) -> Result<Vec<RgbImage>, OCRError> {
320    load_images_batch_with_threshold(paths, None)
321}
322
323/// Loads a batch of images from file paths with a custom parallel threshold.
324///
325/// This function loads multiple images from the specified file paths and
326/// converts them to RgbImages. It uses parallel processing when the number
327/// of images exceeds the specified threshold, or the default threshold if
328/// none is provided.
329///
330/// # Arguments
331///
332/// * `paths` - A slice of paths to the image files to load
333/// * `parallel_threshold` - An optional threshold for parallel processing.
334///   If `None`, the default threshold from `DEFAULT_PARALLEL_THRESHOLD` is used.
335///
336/// # Returns
337///
338/// * `Ok(Vec<RgbImage>)` - A vector of loaded RGB images
339/// * `Err(OCRError)` - An error if any image could not be loaded
340///
341/// # Errors
342///
343/// This function will return an `OCRError` if any image cannot be loaded
344/// from its specified path.
345pub fn load_images_batch_with_threshold<P: AsRef<std::path::Path> + Send + Sync>(
346    paths: &[P],
347    parallel_threshold: Option<usize>,
348) -> Result<Vec<RgbImage>, OCRError> {
349    use crate::core::constants::DEFAULT_PARALLEL_THRESHOLD;
350
351    let threshold = parallel_threshold.unwrap_or(DEFAULT_PARALLEL_THRESHOLD);
352
353    if paths.len() > threshold {
354        use rayon::prelude::*;
355        paths.par_iter().map(|p| load_image(p.as_ref())).collect()
356    } else {
357        paths.iter().map(|p| load_image(p.as_ref())).collect()
358    }
359}
360
361/// Load multiple images from file paths using centralized parallel policy.
362///
363/// This function loads images from the provided file paths using the utility threshold
364/// from the centralized ParallelPolicy. If the number of paths exceeds the threshold,
365/// the loading is performed in parallel using rayon. Otherwise, images are loaded
366/// sequentially.
367///
368/// # Arguments
369///
370/// * `paths` - A slice of paths to image files
371/// * `policy` - The parallel policy containing the utility threshold
372///
373/// # Returns
374///
375/// A Result containing a vector of loaded RgbImages, or an OCRError if any image fails to load.
376///
377/// # Errors
378///
379/// This function will return an `OCRError` if any image cannot be loaded
380/// from its specified path.
381pub fn load_images_batch_with_policy<P: AsRef<std::path::Path> + Send + Sync>(
382    paths: &[P],
383    policy: &crate::core::config::ParallelPolicy,
384) -> Result<Vec<RgbImage>, OCRError> {
385    if paths.len() > policy.utility_threshold {
386        use rayon::prelude::*;
387        paths.par_iter().map(|p| load_image(p.as_ref())).collect()
388    } else {
389        paths.iter().map(|p| load_image(p.as_ref())).collect()
390    }
391}
392
393/// Padding strategy for resize-and-pad operations.
394#[derive(Debug, Clone, Copy, PartialEq, Default)]
395pub enum PaddingStrategy {
396    /// Pad with a solid color
397    SolidColor([u8; 3]),
398    /// Pad with black (equivalent to SolidColor([0, 0, 0]))
399    #[default]
400    Black,
401    /// Left-align the resized image (no centering)
402    LeftAlign([u8; 3]),
403}
404
405/// Configuration for resize-and-pad operations.
406#[derive(Debug, Clone)]
407pub struct ResizePadConfig {
408    /// Target dimensions (width, height)
409    pub target_dims: (u32, u32),
410    /// Padding strategy to use
411    pub padding_strategy: PaddingStrategy,
412    /// Filter type for resizing
413    pub filter_type: image::imageops::FilterType,
414}
415
416impl ResizePadConfig {
417    /// Create a new resize-pad configuration.
418    pub fn new(target_dims: (u32, u32)) -> Self {
419        Self {
420            target_dims,
421            padding_strategy: PaddingStrategy::default(),
422            filter_type: image::imageops::FilterType::Triangle,
423        }
424    }
425
426    /// Set the padding strategy.
427    pub fn with_padding_strategy(mut self, strategy: PaddingStrategy) -> Self {
428        self.padding_strategy = strategy;
429        self
430    }
431
432    /// Set the filter type for resizing.
433    pub fn with_filter_type(mut self, filter_type: image::imageops::FilterType) -> Self {
434        self.filter_type = filter_type;
435        self
436    }
437}
438
439/// Resize an image to fit within target dimensions while maintaining aspect ratio,
440/// then pad to exact target dimensions.
441///
442/// This function provides a unified approach to resize-and-pad operations that
443/// can replace the duplicated logic found in various processors.
444///
445/// # Arguments
446///
447/// * `image` - The input RGB image to resize and pad
448/// * `config` - Configuration for the resize-and-pad operation
449///
450/// # Returns
451///
452/// A resized and padded RGB image with exact target dimensions.
453///
454/// # Errors
455///
456/// Returns `ImageProcessError::InvalidCropSize` if target dimensions are 0.
457pub fn resize_and_pad(
458    image: &RgbImage,
459    config: &ResizePadConfig,
460) -> Result<RgbImage, ImageProcessError> {
461    let (target_width, target_height) = config.target_dims;
462
463    if target_width == 0 || target_height == 0 {
464        return Err(ImageProcessError::InvalidCropSize);
465    }
466
467    let (orig_width, orig_height) = image.dimensions();
468
469    // Calculate scaling factor to fit within target dimensions while maintaining aspect ratio
470    let scale_w = target_width as f32 / orig_width as f32;
471    let scale_h = target_height as f32 / orig_height as f32;
472    let scale = scale_w.min(scale_h);
473
474    // Calculate new dimensions
475    let new_width = (orig_width as f32 * scale) as u32;
476    let new_height = (orig_height as f32 * scale) as u32;
477
478    // Resize the image
479    let resized = image::imageops::resize(image, new_width, new_height, config.filter_type);
480
481    // Create padded image with target dimensions
482    let padding_color = match config.padding_strategy {
483        PaddingStrategy::SolidColor(color) => color,
484        PaddingStrategy::Black => [0, 0, 0],
485        PaddingStrategy::LeftAlign(color) => color,
486    };
487    let padding_rgb = image::Rgb(padding_color);
488    let mut padded = ImageBuffer::from_pixel(target_width, target_height, padding_rgb);
489
490    // Calculate padding offsets
491    let (pad_x, pad_y) = match config.padding_strategy {
492        PaddingStrategy::LeftAlign(_) => (0, 0),
493        _ => {
494            // Center the image
495            let pad_x = (target_width - new_width) / 2;
496            let pad_y = (target_height - new_height) / 2;
497            (pad_x, pad_y)
498        }
499    };
500
501    // Copy resized image to padded image using efficient overlay
502    image::imageops::overlay(&mut padded, &resized, pad_x as i64, pad_y as i64);
503
504    Ok(padded)
505}
506
507/// Configuration for OCR-style resize-and-pad operations with width constraints.
508#[derive(Debug, Clone)]
509pub struct OCRResizePadConfig {
510    /// Target height
511    pub target_height: u32,
512    /// Maximum allowed width
513    pub max_width: u32,
514    /// Padding strategy to use
515    pub padding_strategy: PaddingStrategy,
516    /// Filter type for resizing
517    pub filter_type: image::imageops::FilterType,
518}
519
520impl OCRResizePadConfig {
521    /// Create a new OCR resize-pad configuration.
522    ///
523    /// Uses Triangle (bilinear) interpolation to match OpenCV's cv2.resize default behavior.
524    pub fn new(target_height: u32, max_width: u32) -> Self {
525        Self {
526            target_height,
527            max_width,
528            padding_strategy: PaddingStrategy::default(),
529            // Use Triangle (bilinear) to match cv2.resize INTER_LINEAR
530            filter_type: image::imageops::FilterType::Triangle,
531        }
532    }
533
534    /// Set the padding strategy.
535    pub fn with_padding_strategy(mut self, strategy: PaddingStrategy) -> Self {
536        self.padding_strategy = strategy;
537        self
538    }
539
540    /// Set the filter type for resizing.
541    pub fn with_filter_type(mut self, filter_type: image::imageops::FilterType) -> Self {
542        self.filter_type = filter_type;
543        self
544    }
545}
546
547/// Resize an image for OCR processing with width constraints and padding.
548///
549/// This function handles the specific resize-and-pad logic used in OCR processing,
550/// where images are resized to a fixed height while maintaining aspect ratio,
551/// with a maximum width constraint, and then padded to a target width.
552///
553/// # Arguments
554///
555/// * `image` - The input RGB image to resize and pad
556/// * `config` - Configuration for the OCR resize-and-pad operation
557/// * `target_width_ratio` - Optional ratio to calculate target width from height.
558///   If None, uses the image's original aspect ratio.
559///
560/// # Returns
561///
562/// A tuple containing:
563/// - The resized and padded RGB image
564/// - The actual width used for the padded image
565///
566/// # Errors
567///
568/// Returns `ImageProcessError::InvalidCropSize` if target height is 0.
569pub fn ocr_resize_and_pad(
570    image: &RgbImage,
571    config: &OCRResizePadConfig,
572    target_width_ratio: Option<f32>,
573) -> Result<(RgbImage, u32), ImageProcessError> {
574    if config.target_height == 0 {
575        return Err(ImageProcessError::InvalidCropSize);
576    }
577
578    let (original_w, original_h) = image.dimensions();
579    let original_ratio = original_w as f32 / original_h as f32;
580
581    // Calculate target width based on ratio or original aspect ratio
582    let mut target_w = if let Some(ratio) = target_width_ratio {
583        (config.target_height as f32 * ratio) as u32
584    } else {
585        (config.target_height as f32 * original_ratio).ceil() as u32
586    };
587
588    // Apply maximum width constraint
589    let resized_w = if target_w > config.max_width {
590        target_w = config.max_width;
591        config.max_width
592    } else {
593        // Calculate actual resized width based on aspect ratio
594        let ratio = original_w as f32 / original_h as f32;
595        if (config.target_height as f32 * ratio).ceil() as u32 > target_w {
596            target_w
597        } else {
598            (config.target_height as f32 * ratio).ceil() as u32
599        }
600    };
601
602    // Resize the image
603    let resized_image =
604        image::imageops::resize(image, resized_w, config.target_height, config.filter_type);
605
606    // Create padded image with target dimensions
607    let padding_color = match config.padding_strategy {
608        PaddingStrategy::SolidColor(color) => color,
609        PaddingStrategy::Black => [0, 0, 0],
610        PaddingStrategy::LeftAlign(color) => color,
611    };
612    let padding_rgb = image::Rgb(padding_color);
613    let mut padded_image = ImageBuffer::from_pixel(target_w, config.target_height, padding_rgb);
614
615    // Copy resized image to padded image (left-aligned for OCR)
616    image::imageops::overlay(&mut padded_image, &resized_image, 0, 0);
617
618    Ok((padded_image, target_w))
619}
620
621/// Resizes a batch of images to the specified dimensions.
622///
623/// This function provides a unified approach to batch image resizing that can replace
624/// duplicated resize loops found in various predictors. It supports both functional
625/// and imperative styles and can optionally apply post-processing operations.
626///
627/// # Arguments
628///
629/// * `images` - A slice of RGB images to resize
630/// * `target_width` - Target width for all images
631/// * `target_height` - Target height for all images
632/// * `filter_type` - The filter type to use for resizing (defaults to Lanczos3 if None)
633///
634/// # Returns
635///
636/// A vector of resized RGB images.
637///
638/// # Example
639///
640/// ```rust,no_run
641/// use oar_ocr_core::utils::resize_images_batch;
642/// use image::RgbImage;
643///
644/// let images = vec![RgbImage::new(100, 100), RgbImage::new(200, 150)];
645/// let resized = resize_images_batch(&images, 224, 224, None);
646/// assert_eq!(resized.len(), 2);
647/// assert_eq!(resized[0].dimensions(), (224, 224));
648/// ```
649pub fn resize_images_batch(
650    images: &[RgbImage],
651    target_width: u32,
652    target_height: u32,
653    filter_type: Option<image::imageops::FilterType>,
654) -> Vec<RgbImage> {
655    let filter = filter_type.unwrap_or(image::imageops::FilterType::Lanczos3);
656
657    images
658        .iter()
659        .map(|img| image::imageops::resize(img, target_width, target_height, filter))
660        .collect()
661}
662
663/// Resizes a batch of images and converts them to DynamicImage format.
664///
665/// This function combines batch resizing with conversion to DynamicImage format,
666/// which is commonly needed in OCR preprocessing pipelines.
667///
668/// # Arguments
669///
670/// * `images` - A slice of RGB images to resize
671/// * `target_width` - Target width for all images
672/// * `target_height` - Target height for all images
673/// * `filter_type` - The filter type to use for resizing (defaults to Lanczos3 if None)
674///
675/// # Returns
676///
677/// A vector of resized images as DynamicImage instances.
678pub fn resize_images_batch_to_dynamic(
679    images: &[RgbImage],
680    target_width: u32,
681    target_height: u32,
682    filter_type: Option<image::imageops::FilterType>,
683) -> Vec<DynamicImage> {
684    let filter = filter_type.unwrap_or(image::imageops::FilterType::Lanczos3);
685
686    images
687        .iter()
688        .map(|img| {
689            let resized = image::imageops::resize(img, target_width, target_height, filter);
690            DynamicImage::ImageRgb8(resized)
691        })
692        .collect()
693}
694
695/// Masks a rectangular region in an RGB image with a solid color.
696///
697/// This function fills the specified rectangular region with a solid color,
698/// which is useful for masking formula regions before text detection to prevent
699/// formulas from being incorrectly detected as text (as done in PP-StructureV3).
700///
701/// # Arguments
702///
703/// * `image` - A mutable reference to the RGB image to mask
704/// * `x1` - Left coordinate of the region
705/// * `y1` - Top coordinate of the region
706/// * `x2` - Right coordinate of the region
707/// * `y2` - Bottom coordinate of the region
708/// * `fill_color` - The color to fill the masked region with (default: white [255, 255, 255])
709///
710/// # Returns
711///
712/// Returns `Ok(())` if masking succeeds, or an error if coordinates are invalid.
713///
714/// # Example
715///
716/// ```rust,no_run
717/// use oar_ocr_core::utils::mask_region;
718/// use image::RgbImage;
719///
720/// # fn main() -> Result<(), oar_ocr_core::core::errors::ImageProcessError> {
721/// let mut image = RgbImage::new(100, 100);
722/// // Mask a formula region from (10, 10) to (50, 30) with white
723/// mask_region(&mut image, 10, 10, 50, 30, [255, 255, 255])?;
724/// # Ok(())
725/// # }
726/// ```
727pub fn mask_region(
728    image: &mut RgbImage,
729    x1: u32,
730    y1: u32,
731    x2: u32,
732    y2: u32,
733    fill_color: [u8; 3],
734) -> Result<(), ImageProcessError> {
735    let (img_width, img_height) = image.dimensions();
736
737    // Clamp coordinates to image bounds
738    let x1 = x1.min(img_width);
739    let y1 = y1.min(img_height);
740    let x2 = x2.min(img_width);
741    let y2 = y2.min(img_height);
742
743    if x1 >= x2 || y1 >= y2 {
744        return Err(ImageProcessError::InvalidCropCoordinates);
745    }
746
747    let rgb = image::Rgb(fill_color);
748    for y in y1..y2 {
749        for x in x1..x2 {
750            image.put_pixel(x, y, rgb);
751        }
752    }
753
754    Ok(())
755}
756
757/// Masks multiple bounding box regions in an RGB image.
758///
759/// This function masks multiple regions by filling them with a solid color.
760/// It is useful for batch masking multiple formula or other regions before
761/// text detection (as done in PP-StructureV3).
762///
763/// # Arguments
764///
765/// * `image` - A mutable reference to the RGB image to mask
766/// * `bboxes` - A slice of bounding boxes to mask. Each bbox should provide
767///   `x_min()`, `y_min()`, `x_max()`, `y_max()` methods.
768/// * `fill_color` - The color to fill the masked regions with
769///
770/// # Example
771///
772/// ```rust,no_run
773/// use oar_ocr_core::utils::mask_regions;
774/// use oar_ocr_core::processors::BoundingBox;
775/// use image::RgbImage;
776///
777/// let mut image = RgbImage::new(100, 100);
778/// let bboxes = vec![
779///     BoundingBox::from_coords(10.0, 10.0, 30.0, 30.0),
780///     BoundingBox::from_coords(50.0, 50.0, 70.0, 70.0),
781/// ];
782/// mask_regions(&mut image, &bboxes, [255, 255, 255]);
783/// ```
784pub fn mask_regions(
785    image: &mut RgbImage,
786    bboxes: &[crate::processors::BoundingBox],
787    fill_color: [u8; 3],
788) {
789    for bbox in bboxes {
790        let x1 = bbox.x_min() as u32;
791        let y1 = bbox.y_min() as u32;
792        let x2 = bbox.x_max() as u32;
793        let y2 = bbox.y_max() as u32;
794
795        // Ignore errors for individual regions (they might be out of bounds)
796        let _ = mask_region(image, x1, y1, x2, y2, fill_color);
797    }
798}
799
800#[cfg(test)]
801mod tests {
802    use super::*;
803    use ::image::{GenericImageView, GrayImage, ImageBuffer, Rgb, RgbImage};
804
805    fn create_test_image(width: u32, height: u32, color: [u8; 3]) -> RgbImage {
806        ImageBuffer::from_pixel(width, height, Rgb(color))
807    }
808
809    #[test]
810    fn basic_size_checks() {
811        assert!(check_image_size(&[100, 100]).is_ok());
812        assert!(check_image_size(&[0, 50]).is_err());
813    }
814
815    #[test]
816    fn slice_rgb_image_region() -> Result<(), ImageProcessError> {
817        let img = RgbImage::from_pixel(10, 10, Rgb([255, 0, 0]));
818        let cropped = slice_image(&img, (2, 2, 6, 6))?;
819        assert_eq!(cropped.dimensions(), (4, 4));
820        assert!(slice_image(&img, (6, 6, 2, 2)).is_err());
821        Ok(())
822    }
823
824    #[test]
825    fn slice_gray_image_region() -> Result<(), ImageProcessError> {
826        let img = GrayImage::from_pixel(10, 10, image::Luma([128]));
827        let cropped = slice_gray_image(&img, (1, 1, 5, 5))?;
828        assert_eq!(cropped.dimensions(), (4, 4));
829        Ok(())
830    }
831
832    #[test]
833    fn center_crop_coordinates() -> Result<(), ImageProcessError> {
834        let coords = calculate_center_crop_coords(100, 60, 40, 20)?;
835        assert_eq!(coords, (30, 20));
836        assert!(calculate_center_crop_coords(20, 20, 40, 10).is_err());
837        Ok(())
838    }
839
840    #[test]
841    fn crop_bounds_validation() {
842        assert!(validate_crop_bounds(100, 80, 10, 10, 40, 40).is_ok());
843        assert!(validate_crop_bounds(100, 80, 70, 10, 40, 40).is_err());
844    }
845
846    #[test]
847    fn pad_image_to_target() -> Result<(), ImageProcessError> {
848        let img = RgbImage::from_pixel(20, 20, Rgb([10, 20, 30]));
849        let padded = pad_image(&img, 40, 40, [0, 0, 0])?;
850        assert_eq!(padded.dimensions(), (40, 40));
851        assert!(pad_image(&img, 10, 10, [0, 0, 0]).is_err());
852        Ok(())
853    }
854
855    #[test]
856    fn test_resize_and_pad_with_custom_padding() -> Result<(), ImageProcessError> {
857        let image = create_test_image(50, 100, [255, 0, 0]); // 1:2 aspect ratio (tall)
858        let config = ResizePadConfig::new((80, 80))
859            .with_padding_strategy(PaddingStrategy::SolidColor([0, 255, 0])); // Green padding
860
861        let result = resize_and_pad(&image, &config)?;
862
863        assert_eq!(result.dimensions(), (80, 80));
864
865        // The resized image should be 40x80 (maintaining 1:2 ratio), centered in 80x80
866        // So there should be 20 pixels of padding on left and right
867        let center_pixel = result.get_pixel(40, 40); // Center of image
868        assert_eq!(*center_pixel, Rgb([255, 0, 0])); // Should be red (original image)
869
870        let left_padding = result.get_pixel(10, 40); // Left padding area
871        assert_eq!(*left_padding, Rgb([0, 255, 0])); // Should be green (custom padding)
872        Ok(())
873    }
874
875    #[test]
876    fn test_resize_and_pad_left_align() -> Result<(), ImageProcessError> {
877        let image = create_test_image(50, 100, [0, 0, 255]); // 1:2 aspect ratio (tall)
878        let config = ResizePadConfig::new((80, 80))
879            .with_padding_strategy(PaddingStrategy::LeftAlign([255, 255, 0])); // Yellow padding, left-aligned
880
881        let result = resize_and_pad(&image, &config)?;
882
883        assert_eq!(result.dimensions(), (80, 80));
884
885        // The resized image should be 40x80, left-aligned in 80x80
886        let left_edge_pixel = result.get_pixel(20, 40); // Should be in the resized image
887        assert_eq!(*left_edge_pixel, Rgb([0, 0, 255])); // Should be blue (original image)
888
889        let right_padding = result.get_pixel(60, 40); // Right padding area
890        assert_eq!(*right_padding, Rgb([255, 255, 0])); // Should be yellow (padding)
891        Ok(())
892    }
893
894    #[test]
895    fn test_resize_images_batch() {
896        // Create test images with different sizes
897        let img1 = create_test_image(100, 50, [255, 0, 0]); // Red
898        let img2 = create_test_image(200, 100, [0, 255, 0]); // Green
899        let images = vec![img1, img2];
900
901        // Resize batch to 64x64
902        let resized = resize_images_batch(&images, 64, 64, None);
903
904        assert_eq!(resized.len(), 2);
905        assert_eq!(resized[0].dimensions(), (64, 64));
906        assert_eq!(resized[1].dimensions(), (64, 64));
907
908        // Check that the colors are preserved (approximately)
909        let pixel1 = resized[0].get_pixel(32, 32);
910        let pixel2 = resized[1].get_pixel(32, 32);
911
912        // Red image should still be predominantly red
913        assert!(pixel1[0] > pixel1[1] && pixel1[0] > pixel1[2]);
914        // Green image should still be predominantly green
915        assert!(pixel2[1] > pixel2[0] && pixel2[1] > pixel2[2]);
916    }
917
918    #[test]
919    fn test_resize_images_batch_to_dynamic() {
920        // Create test images
921        let img1 = create_test_image(100, 50, [255, 0, 0]);
922        let img2 = create_test_image(200, 100, [0, 255, 0]);
923        let images = vec![img1, img2];
924
925        // Resize batch to 32x32 and convert to DynamicImage
926        let resized = resize_images_batch_to_dynamic(&images, 32, 32, None);
927
928        assert_eq!(resized.len(), 2);
929
930        // Check that they are DynamicImage::ImageRgb8 variants
931        for dynamic_img in &resized {
932            assert_eq!(dynamic_img.dimensions(), (32, 32));
933            assert!(
934                matches!(dynamic_img, DynamicImage::ImageRgb8(_)),
935                "Expected ImageRgb8 variant"
936            );
937        }
938    }
939
940    #[test]
941    fn test_resize_images_batch_empty() {
942        let images: Vec<RgbImage> = vec![];
943        let resized = resize_images_batch(&images, 64, 64, None);
944        assert!(resized.is_empty());
945    }
946
947    #[test]
948    fn test_resize_images_batch_custom_filter() {
949        let img = create_test_image(100, 100, [128, 128, 128]);
950        let images = vec![img];
951
952        // Test with different filter types
953        let resized_lanczos =
954            resize_images_batch(&images, 50, 50, Some(image::imageops::FilterType::Lanczos3));
955        let resized_nearest =
956            resize_images_batch(&images, 50, 50, Some(image::imageops::FilterType::Nearest));
957
958        assert_eq!(resized_lanczos.len(), 1);
959        assert_eq!(resized_nearest.len(), 1);
960        assert_eq!(resized_lanczos[0].dimensions(), (50, 50));
961        assert_eq!(resized_nearest[0].dimensions(), (50, 50));
962    }
963
964    #[test]
965    fn test_ocr_resize_and_pad_with_max_width_constraint() -> Result<(), ImageProcessError> {
966        let image = create_test_image(400, 100, [200, 100, 50]); // 4:1 aspect ratio
967        let config = OCRResizePadConfig::new(32, 100); // Height 32, max width 100
968
969        let (result, actual_width) = ocr_resize_and_pad(&image, &config, None)?;
970
971        assert_eq!(result.height(), 32);
972        assert_eq!(actual_width, 100); // Should be constrained to max width
973        assert_eq!(result.width(), 100);
974
975        // Check that the image is left-aligned
976        let left_pixel = result.get_pixel(0, 16); // Left edge, middle height
977        assert_eq!(*left_pixel, Rgb([200, 100, 50])); // Should be original color
978        Ok(())
979    }
980
981    #[test]
982    fn test_ocr_resize_and_pad_with_target_ratio() -> Result<(), ImageProcessError> {
983        let image = create_test_image(100, 50, [255, 128, 64]); // 2:1 aspect ratio
984        let config = OCRResizePadConfig::new(32, 200); // Height 32, max width 200
985        let target_ratio = 3.0; // Force 3:1 ratio
986
987        let (result, actual_width) = ocr_resize_and_pad(&image, &config, Some(target_ratio))?;
988
989        assert_eq!(result.height(), 32);
990        assert_eq!(actual_width, 96); // 32 * 3.0 = 96
991        assert_eq!(result.width(), 96);
992        Ok(())
993    }
994
995    #[test]
996    fn test_resize_pad_config_builder() {
997        let config = ResizePadConfig::new((100, 50))
998            .with_padding_strategy(PaddingStrategy::SolidColor([255, 0, 0]))
999            .with_filter_type(image::imageops::FilterType::Lanczos3);
1000
1001        assert_eq!(config.target_dims, (100, 50));
1002        assert_eq!(
1003            config.padding_strategy,
1004            PaddingStrategy::SolidColor([255, 0, 0])
1005        );
1006        assert_eq!(config.filter_type, image::imageops::FilterType::Lanczos3);
1007    }
1008
1009    #[test]
1010    fn test_ocr_resize_pad_config_builder() {
1011        let config = OCRResizePadConfig::new(64, 320)
1012            .with_padding_strategy(PaddingStrategy::SolidColor([100, 100, 100]))
1013            .with_filter_type(image::imageops::FilterType::Nearest);
1014
1015        assert_eq!(config.target_height, 64);
1016        assert_eq!(config.max_width, 320);
1017        assert_eq!(
1018            config.padding_strategy,
1019            PaddingStrategy::SolidColor([100, 100, 100])
1020        );
1021        assert_eq!(config.filter_type, image::imageops::FilterType::Nearest);
1022    }
1023}
oar_ocr_core/utils/image.rs

oar_ocr_core/utils/
image.rs