use crate::core::OCRError;
use crate::core::errors::ImageProcessError;
use image::{DynamicImage, GrayImage, ImageBuffer, ImageError, ImageReader, RgbImage};
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
pub fn dynamic_to_rgb(img: DynamicImage) -> RgbImage {
img.to_rgb8()
}
pub fn dynamic_to_gray(img: DynamicImage) -> GrayImage {
img.to_luma8()
}
pub fn load_image_from_memory(bytes: &[u8]) -> Result<RgbImage, OCRError> {
let img = image::load_from_memory(bytes).map_err(OCRError::ImageLoad)?;
Ok(dynamic_to_rgb(img))
}
pub fn load_image<P: AsRef<Path>>(path: P) -> Result<RgbImage, OCRError> {
let img = open_image_any_format(path.as_ref()).map_err(OCRError::ImageLoad)?;
Ok(dynamic_to_rgb(img))
}
fn open_image_any_format(path: &Path) -> Result<DynamicImage, ImageError> {
match image::open(path) {
Ok(img) => Ok(img),
Err(err) if should_retry(&err) => {
tracing::warn!(
"Standard decode failed for {} ({err}). Retrying with format sniffing.",
path.display()
);
decode_with_guessed_format(path)
}
Err(err) => Err(err),
}
}
fn should_retry(err: &ImageError) -> bool {
matches!(err, ImageError::Decoding(_) | ImageError::Unsupported(_))
}
fn decode_with_guessed_format(path: &Path) -> Result<DynamicImage, ImageError> {
let file = File::open(path)?;
let reader = BufReader::new(file);
let reader = ImageReader::new(reader).with_guessed_format()?;
reader.decode()
}
pub fn create_rgb_image(width: u32, height: u32, data: Vec<u8>) -> Option<RgbImage> {
if data.len() != (width * height * 3) as usize {
return None;
}
ImageBuffer::from_raw(width, height, data)
}
pub fn check_image_size(size: &[u32; 2]) -> Result<(), ImageProcessError> {
if size[0] == 0 || size[1] == 0 {
return Err(ImageProcessError::InvalidCropSize);
}
Ok(())
}
pub fn slice_image(
img: &RgbImage,
coords: (u32, u32, u32, u32),
) -> Result<RgbImage, ImageProcessError> {
let (x1, y1, x2, y2) = coords;
let (img_width, img_height) = img.dimensions();
if x1 >= x2 || y1 >= y2 {
return Err(ImageProcessError::InvalidCropCoordinates);
}
if x2 > img_width || y2 > img_height {
return Err(ImageProcessError::CropOutOfBounds);
}
let crop_width = x2 - x1;
let crop_height = y2 - y1;
Ok(image::imageops::crop_imm(img, x1, y1, crop_width, crop_height).to_image())
}
pub fn slice_gray_image(
img: &GrayImage,
coords: (u32, u32, u32, u32),
) -> Result<GrayImage, ImageProcessError> {
let (x1, y1, x2, y2) = coords;
let (img_width, img_height) = img.dimensions();
if x1 >= x2 || y1 >= y2 {
return Err(ImageProcessError::InvalidCropCoordinates);
}
if x2 > img_width || y2 > img_height {
return Err(ImageProcessError::CropOutOfBounds);
}
let crop_width = x2 - x1;
let crop_height = y2 - y1;
Ok(image::imageops::crop_imm(img, x1, y1, crop_width, crop_height).to_image())
}
pub fn calculate_center_crop_coords(
img_width: u32,
img_height: u32,
crop_width: u32,
crop_height: u32,
) -> Result<(u32, u32), ImageProcessError> {
if crop_width > img_width || crop_height > img_height {
return Err(ImageProcessError::CropSizeTooLarge);
}
let x = (img_width - crop_width) / 2;
let y = (img_height - crop_height) / 2;
Ok((x, y))
}
pub fn validate_crop_bounds(
img_width: u32,
img_height: u32,
x: u32,
y: u32,
crop_width: u32,
crop_height: u32,
) -> Result<(), ImageProcessError> {
if x + crop_width > img_width || y + crop_height > img_height {
return Err(ImageProcessError::CropOutOfBounds);
}
Ok(())
}
pub fn resize_image(
img: &RgbImage,
width: u32,
height: u32,
) -> Result<RgbImage, ImageProcessError> {
if width == 0 || height == 0 {
return Err(ImageProcessError::InvalidCropSize);
}
Ok(image::imageops::resize(
img,
width,
height,
image::imageops::FilterType::Lanczos3,
))
}
pub fn resize_gray_image(
img: &GrayImage,
width: u32,
height: u32,
) -> Result<GrayImage, ImageProcessError> {
if width == 0 || height == 0 {
return Err(ImageProcessError::InvalidCropSize);
}
Ok(image::imageops::resize(
img,
width,
height,
image::imageops::FilterType::Lanczos3,
))
}
pub fn rgb_to_grayscale(img: &RgbImage) -> GrayImage {
image::imageops::grayscale(img)
}
pub fn pad_image(
img: &RgbImage,
target_width: u32,
target_height: u32,
fill_color: [u8; 3],
) -> Result<RgbImage, ImageProcessError> {
let (src_width, src_height) = img.dimensions();
if target_width < src_width || target_height < src_height {
return Err(ImageProcessError::InvalidCropSize);
}
if target_width == src_width && target_height == src_height {
return Ok(img.clone());
}
let mut padded = RgbImage::from_pixel(target_width, target_height, image::Rgb(fill_color));
let x_offset = (target_width - src_width) / 2;
let y_offset = (target_height - src_height) / 2;
image::imageops::overlay(&mut padded, img, x_offset as i64, y_offset as i64);
Ok(padded)
}
pub fn load_images<P: AsRef<std::path::Path> + Send + Sync>(
paths: &[P],
) -> Result<Vec<RgbImage>, OCRError> {
load_images_batch_with_threshold(paths, None)
}
pub fn load_images_batch_with_threshold<P: AsRef<std::path::Path> + Send + Sync>(
paths: &[P],
parallel_threshold: Option<usize>,
) -> Result<Vec<RgbImage>, OCRError> {
use crate::core::constants::DEFAULT_PARALLEL_THRESHOLD;
let threshold = parallel_threshold.unwrap_or(DEFAULT_PARALLEL_THRESHOLD);
if paths.len() > threshold {
use rayon::prelude::*;
paths.par_iter().map(|p| load_image(p.as_ref())).collect()
} else {
paths.iter().map(|p| load_image(p.as_ref())).collect()
}
}
pub fn load_images_batch_with_policy<P: AsRef<std::path::Path> + Send + Sync>(
paths: &[P],
policy: &crate::core::config::ParallelPolicy,
) -> Result<Vec<RgbImage>, OCRError> {
if paths.len() > policy.utility_threshold {
use rayon::prelude::*;
paths.par_iter().map(|p| load_image(p.as_ref())).collect()
} else {
paths.iter().map(|p| load_image(p.as_ref())).collect()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub enum PaddingStrategy {
SolidColor([u8; 3]),
#[default]
Black,
LeftAlign([u8; 3]),
}
#[derive(Debug, Clone)]
pub struct ResizePadConfig {
pub target_dims: (u32, u32),
pub padding_strategy: PaddingStrategy,
pub filter_type: image::imageops::FilterType,
}
impl ResizePadConfig {
pub fn new(target_dims: (u32, u32)) -> Self {
Self {
target_dims,
padding_strategy: PaddingStrategy::default(),
filter_type: image::imageops::FilterType::Triangle,
}
}
pub fn with_padding_strategy(mut self, strategy: PaddingStrategy) -> Self {
self.padding_strategy = strategy;
self
}
pub fn with_filter_type(mut self, filter_type: image::imageops::FilterType) -> Self {
self.filter_type = filter_type;
self
}
}
pub fn resize_and_pad(
image: &RgbImage,
config: &ResizePadConfig,
) -> Result<RgbImage, ImageProcessError> {
let (target_width, target_height) = config.target_dims;
if target_width == 0 || target_height == 0 {
return Err(ImageProcessError::InvalidCropSize);
}
let (orig_width, orig_height) = image.dimensions();
let scale_w = target_width as f32 / orig_width as f32;
let scale_h = target_height as f32 / orig_height as f32;
let scale = scale_w.min(scale_h);
let new_width = (orig_width as f32 * scale) as u32;
let new_height = (orig_height as f32 * scale) as u32;
let resized = image::imageops::resize(image, new_width, new_height, config.filter_type);
let padding_color = match config.padding_strategy {
PaddingStrategy::SolidColor(color) => color,
PaddingStrategy::Black => [0, 0, 0],
PaddingStrategy::LeftAlign(color) => color,
};
let padding_rgb = image::Rgb(padding_color);
let mut padded = ImageBuffer::from_pixel(target_width, target_height, padding_rgb);
let (pad_x, pad_y) = match config.padding_strategy {
PaddingStrategy::LeftAlign(_) => (0, 0),
_ => {
let pad_x = (target_width - new_width) / 2;
let pad_y = (target_height - new_height) / 2;
(pad_x, pad_y)
}
};
image::imageops::overlay(&mut padded, &resized, pad_x as i64, pad_y as i64);
Ok(padded)
}
#[derive(Debug, Clone)]
pub struct OCRResizePadConfig {
pub target_height: u32,
pub max_width: u32,
pub padding_strategy: PaddingStrategy,
pub filter_type: image::imageops::FilterType,
}
impl OCRResizePadConfig {
pub fn new(target_height: u32, max_width: u32) -> Self {
Self {
target_height,
max_width,
padding_strategy: PaddingStrategy::default(),
filter_type: image::imageops::FilterType::Triangle,
}
}
pub fn with_padding_strategy(mut self, strategy: PaddingStrategy) -> Self {
self.padding_strategy = strategy;
self
}
pub fn with_filter_type(mut self, filter_type: image::imageops::FilterType) -> Self {
self.filter_type = filter_type;
self
}
}
pub fn ocr_resize_and_pad(
image: &RgbImage,
config: &OCRResizePadConfig,
target_width_ratio: Option<f32>,
) -> Result<(RgbImage, u32), ImageProcessError> {
if config.target_height == 0 {
return Err(ImageProcessError::InvalidCropSize);
}
let (original_w, original_h) = image.dimensions();
let original_ratio = original_w as f32 / original_h as f32;
let mut target_w = if let Some(ratio) = target_width_ratio {
(config.target_height as f32 * ratio) as u32
} else {
(config.target_height as f32 * original_ratio).ceil() as u32
};
let resized_w = if target_w > config.max_width {
target_w = config.max_width;
config.max_width
} else {
let ratio = original_w as f32 / original_h as f32;
if (config.target_height as f32 * ratio).ceil() as u32 > target_w {
target_w
} else {
(config.target_height as f32 * ratio).ceil() as u32
}
};
let resized_image =
image::imageops::resize(image, resized_w, config.target_height, config.filter_type);
let padding_color = match config.padding_strategy {
PaddingStrategy::SolidColor(color) => color,
PaddingStrategy::Black => [0, 0, 0],
PaddingStrategy::LeftAlign(color) => color,
};
let padding_rgb = image::Rgb(padding_color);
let mut padded_image = ImageBuffer::from_pixel(target_w, config.target_height, padding_rgb);
image::imageops::overlay(&mut padded_image, &resized_image, 0, 0);
Ok((padded_image, target_w))
}
pub fn resize_images_batch(
images: &[RgbImage],
target_width: u32,
target_height: u32,
filter_type: Option<image::imageops::FilterType>,
) -> Vec<RgbImage> {
let filter = filter_type.unwrap_or(image::imageops::FilterType::Lanczos3);
images
.iter()
.map(|img| image::imageops::resize(img, target_width, target_height, filter))
.collect()
}
pub fn resize_images_batch_to_dynamic(
images: &[RgbImage],
target_width: u32,
target_height: u32,
filter_type: Option<image::imageops::FilterType>,
) -> Vec<DynamicImage> {
let filter = filter_type.unwrap_or(image::imageops::FilterType::Lanczos3);
images
.iter()
.map(|img| {
let resized = image::imageops::resize(img, target_width, target_height, filter);
DynamicImage::ImageRgb8(resized)
})
.collect()
}
pub fn mask_region(
image: &mut RgbImage,
x1: u32,
y1: u32,
x2: u32,
y2: u32,
fill_color: [u8; 3],
) -> Result<(), ImageProcessError> {
let (img_width, img_height) = image.dimensions();
let x1 = x1.min(img_width);
let y1 = y1.min(img_height);
let x2 = x2.min(img_width);
let y2 = y2.min(img_height);
if x1 >= x2 || y1 >= y2 {
return Err(ImageProcessError::InvalidCropCoordinates);
}
let rgb = image::Rgb(fill_color);
for y in y1..y2 {
for x in x1..x2 {
image.put_pixel(x, y, rgb);
}
}
Ok(())
}
pub fn mask_regions(
image: &mut RgbImage,
bboxes: &[crate::processors::BoundingBox],
fill_color: [u8; 3],
) {
for bbox in bboxes {
let x1 = bbox.x_min() as u32;
let y1 = bbox.y_min() as u32;
let x2 = bbox.x_max() as u32;
let y2 = bbox.y_max() as u32;
let _ = mask_region(image, x1, y1, x2, y2, fill_color);
}
}
#[cfg(test)]
mod tests {
use super::*;
use ::image::{GenericImageView, GrayImage, ImageBuffer, Rgb, RgbImage};
fn create_test_image(width: u32, height: u32, color: [u8; 3]) -> RgbImage {
ImageBuffer::from_pixel(width, height, Rgb(color))
}
#[test]
fn basic_size_checks() {
assert!(check_image_size(&[100, 100]).is_ok());
assert!(check_image_size(&[0, 50]).is_err());
}
#[test]
fn slice_rgb_image_region() -> Result<(), ImageProcessError> {
let img = RgbImage::from_pixel(10, 10, Rgb([255, 0, 0]));
let cropped = slice_image(&img, (2, 2, 6, 6))?;
assert_eq!(cropped.dimensions(), (4, 4));
assert!(slice_image(&img, (6, 6, 2, 2)).is_err());
Ok(())
}
#[test]
fn slice_gray_image_region() -> Result<(), ImageProcessError> {
let img = GrayImage::from_pixel(10, 10, image::Luma([128]));
let cropped = slice_gray_image(&img, (1, 1, 5, 5))?;
assert_eq!(cropped.dimensions(), (4, 4));
Ok(())
}
#[test]
fn center_crop_coordinates() -> Result<(), ImageProcessError> {
let coords = calculate_center_crop_coords(100, 60, 40, 20)?;
assert_eq!(coords, (30, 20));
assert!(calculate_center_crop_coords(20, 20, 40, 10).is_err());
Ok(())
}
#[test]
fn crop_bounds_validation() {
assert!(validate_crop_bounds(100, 80, 10, 10, 40, 40).is_ok());
assert!(validate_crop_bounds(100, 80, 70, 10, 40, 40).is_err());
}
#[test]
fn pad_image_to_target() -> Result<(), ImageProcessError> {
let img = RgbImage::from_pixel(20, 20, Rgb([10, 20, 30]));
let padded = pad_image(&img, 40, 40, [0, 0, 0])?;
assert_eq!(padded.dimensions(), (40, 40));
assert!(pad_image(&img, 10, 10, [0, 0, 0]).is_err());
Ok(())
}
#[test]
fn test_resize_and_pad_with_custom_padding() -> Result<(), ImageProcessError> {
let image = create_test_image(50, 100, [255, 0, 0]); let config = ResizePadConfig::new((80, 80))
.with_padding_strategy(PaddingStrategy::SolidColor([0, 255, 0]));
let result = resize_and_pad(&image, &config)?;
assert_eq!(result.dimensions(), (80, 80));
let center_pixel = result.get_pixel(40, 40); assert_eq!(*center_pixel, Rgb([255, 0, 0]));
let left_padding = result.get_pixel(10, 40); assert_eq!(*left_padding, Rgb([0, 255, 0])); Ok(())
}
#[test]
fn test_resize_and_pad_left_align() -> Result<(), ImageProcessError> {
let image = create_test_image(50, 100, [0, 0, 255]); let config = ResizePadConfig::new((80, 80))
.with_padding_strategy(PaddingStrategy::LeftAlign([255, 255, 0]));
let result = resize_and_pad(&image, &config)?;
assert_eq!(result.dimensions(), (80, 80));
let left_edge_pixel = result.get_pixel(20, 40); assert_eq!(*left_edge_pixel, Rgb([0, 0, 255]));
let right_padding = result.get_pixel(60, 40); assert_eq!(*right_padding, Rgb([255, 255, 0])); Ok(())
}
#[test]
fn test_resize_images_batch() {
let img1 = create_test_image(100, 50, [255, 0, 0]); let img2 = create_test_image(200, 100, [0, 255, 0]); let images = vec![img1, img2];
let resized = resize_images_batch(&images, 64, 64, None);
assert_eq!(resized.len(), 2);
assert_eq!(resized[0].dimensions(), (64, 64));
assert_eq!(resized[1].dimensions(), (64, 64));
let pixel1 = resized[0].get_pixel(32, 32);
let pixel2 = resized[1].get_pixel(32, 32);
assert!(pixel1[0] > pixel1[1] && pixel1[0] > pixel1[2]);
assert!(pixel2[1] > pixel2[0] && pixel2[1] > pixel2[2]);
}
#[test]
fn test_resize_images_batch_to_dynamic() {
let img1 = create_test_image(100, 50, [255, 0, 0]);
let img2 = create_test_image(200, 100, [0, 255, 0]);
let images = vec![img1, img2];
let resized = resize_images_batch_to_dynamic(&images, 32, 32, None);
assert_eq!(resized.len(), 2);
for dynamic_img in &resized {
assert_eq!(dynamic_img.dimensions(), (32, 32));
assert!(
matches!(dynamic_img, DynamicImage::ImageRgb8(_)),
"Expected ImageRgb8 variant"
);
}
}
#[test]
fn test_resize_images_batch_empty() {
let images: Vec<RgbImage> = vec![];
let resized = resize_images_batch(&images, 64, 64, None);
assert!(resized.is_empty());
}
#[test]
fn test_resize_images_batch_custom_filter() {
let img = create_test_image(100, 100, [128, 128, 128]);
let images = vec![img];
let resized_lanczos =
resize_images_batch(&images, 50, 50, Some(image::imageops::FilterType::Lanczos3));
let resized_nearest =
resize_images_batch(&images, 50, 50, Some(image::imageops::FilterType::Nearest));
assert_eq!(resized_lanczos.len(), 1);
assert_eq!(resized_nearest.len(), 1);
assert_eq!(resized_lanczos[0].dimensions(), (50, 50));
assert_eq!(resized_nearest[0].dimensions(), (50, 50));
}
#[test]
fn test_ocr_resize_and_pad_with_max_width_constraint() -> Result<(), ImageProcessError> {
let image = create_test_image(400, 100, [200, 100, 50]); let config = OCRResizePadConfig::new(32, 100);
let (result, actual_width) = ocr_resize_and_pad(&image, &config, None)?;
assert_eq!(result.height(), 32);
assert_eq!(actual_width, 100); assert_eq!(result.width(), 100);
let left_pixel = result.get_pixel(0, 16); assert_eq!(*left_pixel, Rgb([200, 100, 50])); Ok(())
}
#[test]
fn test_ocr_resize_and_pad_with_target_ratio() -> Result<(), ImageProcessError> {
let image = create_test_image(100, 50, [255, 128, 64]); let config = OCRResizePadConfig::new(32, 200); let target_ratio = 3.0;
let (result, actual_width) = ocr_resize_and_pad(&image, &config, Some(target_ratio))?;
assert_eq!(result.height(), 32);
assert_eq!(actual_width, 96); assert_eq!(result.width(), 96);
Ok(())
}
#[test]
fn test_resize_pad_config_builder() {
let config = ResizePadConfig::new((100, 50))
.with_padding_strategy(PaddingStrategy::SolidColor([255, 0, 0]))
.with_filter_type(image::imageops::FilterType::Lanczos3);
assert_eq!(config.target_dims, (100, 50));
assert_eq!(
config.padding_strategy,
PaddingStrategy::SolidColor([255, 0, 0])
);
assert_eq!(config.filter_type, image::imageops::FilterType::Lanczos3);
}
#[test]
fn test_ocr_resize_pad_config_builder() {
let config = OCRResizePadConfig::new(64, 320)
.with_padding_strategy(PaddingStrategy::SolidColor([100, 100, 100]))
.with_filter_type(image::imageops::FilterType::Nearest);
assert_eq!(config.target_height, 64);
assert_eq!(config.max_width, 320);
assert_eq!(
config.padding_strategy,
PaddingStrategy::SolidColor([100, 100, 100])
);
assert_eq!(config.filter_type, image::imageops::FilterType::Nearest);
}
}