#![allow(clippy::unnecessary_wraps)]
use image::{DynamicImage, GenericImageView, ImageBuffer, Rgb};
use ndarray::Array4;
use crate::error::Result;
pub const IMAGENET_MEAN: [f32; 3] = [0.485, 0.456, 0.406];
pub const IMAGENET_STD: [f32; 3] = [0.229, 0.224, 0.225];
pub const LAYOUT_MODEL_SIZE: u32 = 1025;
pub fn preprocess_for_layout(image: &DynamicImage) -> Result<Array4<f32>> {
let resized = resize_with_padding(image, LAYOUT_MODEL_SIZE, LAYOUT_MODEL_SIZE)?;
let rgb_image = resized.to_rgb8();
let tensor = image_to_tensor(&rgb_image, &IMAGENET_MEAN, &IMAGENET_STD)?;
Ok(tensor)
}
fn resize_with_padding(
image: &DynamicImage,
target_width: u32,
target_height: u32,
) -> Result<DynamicImage> {
let (width, height) = image.dimensions();
let aspect_ratio = width as f32 / height as f32;
let target_aspect_ratio = target_width as f32 / target_height as f32;
let (new_width, new_height) = if aspect_ratio > target_aspect_ratio {
(target_width, (target_width as f32 / aspect_ratio) as u32)
} else {
((target_height as f32 * aspect_ratio) as u32, target_height)
};
let resized = image.resize_exact(new_width, new_height, image::imageops::FilterType::Lanczos3);
let mut canvas = DynamicImage::new_rgb8(target_width, target_height);
let x_offset = (target_width - new_width) / 2;
let y_offset = (target_height - new_height) / 2;
image::imageops::overlay(
&mut canvas,
&resized,
i64::from(x_offset),
i64::from(y_offset),
);
Ok(canvas)
}
fn image_to_tensor(
image: &ImageBuffer<Rgb<u8>, Vec<u8>>,
mean: &[f32; 3],
std: &[f32; 3],
) -> Result<Array4<f32>> {
let (width, height) = image.dimensions();
let mut tensor = Array4::<f32>::zeros((1, 3, height as usize, width as usize));
for y in 0..height {
for x in 0..width {
let pixel = image.get_pixel(x, y);
for c in 0..3 {
let value = f32::from(pixel[c]) / 255.0; let normalized = (value - mean[c]) / std[c];
tensor[[0, c, y as usize, x as usize]] = normalized;
}
}
}
Ok(tensor)
}
pub fn preprocess_for_table(image: &DynamicImage, scale: f32) -> Result<Array4<f32>> {
let (width, height) = image.dimensions();
let new_width = (width as f32 * scale) as u32;
let new_height = (height as f32 * scale) as u32;
let resized = image.resize_exact(new_width, new_height, image::imageops::FilterType::Lanczos3);
let rgb_image = resized.to_rgb8();
let tensor = image_to_tensor(&rgb_image, &IMAGENET_MEAN, &IMAGENET_STD)?;
Ok(tensor)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_resize_with_padding() {
let image = DynamicImage::new_rgb8(800, 600);
let resized = resize_with_padding(&image, 1025, 1025).unwrap();
assert_eq!(resized.dimensions(), (1025, 1025));
}
#[test]
fn test_tensor_shape() {
let image = DynamicImage::new_rgb8(1025, 1025);
let tensor = preprocess_for_layout(&image).unwrap();
assert_eq!(tensor.shape(), &[1, 3, 1025, 1025]);
}
#[test]
fn test_normalization_range() {
let image = DynamicImage::new_rgb8(100, 100);
let tensor = preprocess_for_layout(&image).unwrap();
let min = tensor.iter().cloned().fold(f32::INFINITY, f32::min);
let max = tensor.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
assert!(min > -5.0 && min < 0.0);
assert!(max > 0.0 && max < 5.0);
}
}