car-inference 0.13.0

Local model inference for CAR — Candle backend with Qwen3 models
Documentation
//! Image loading helpers for video conditioning inputs (i2v).

use std::path::Path;

use image::imageops::FilterType;
use mlx_rs::Array;

use crate::InferenceError;

/// Load an image from disk and convert it to an MLX tensor of shape
/// `[1, height, width, 3]` with f32 values in `[0, 1]`.
///
/// Aspect ratio is preserved by center-cropping the source to match the
/// target aspect before a Lanczos3 resize — this avoids silently stretching
/// landscape references into portrait latents (or vice versa). Non-RGB inputs
/// (RGBA, grayscale) are converted to RGB; the alpha channel is discarded.
pub fn load_rgb_image(path: &Path, width: u32, height: u32) -> Result<Array, InferenceError> {
    assert!(
        width > 0 && height > 0,
        "image target dims must be non-zero"
    );

    let img = image::open(path)
        .map_err(|e| InferenceError::InferenceFailed(format!("open {}: {e}", path.display())))?;

    let (src_w, src_h) = (img.width(), img.height());
    let target_aspect = width as f64 / height as f64;
    let src_aspect = src_w as f64 / src_h as f64;

    // Center-crop to target aspect, then resize exact.
    let cropped = if (src_aspect - target_aspect).abs() < 1e-6 {
        img
    } else if src_aspect > target_aspect {
        // Source is wider: crop horizontally.
        let crop_w = (src_h as f64 * target_aspect).round() as u32;
        let x = (src_w - crop_w) / 2;
        img.crop_imm(x, 0, crop_w, src_h)
    } else {
        // Source is taller: crop vertically.
        let crop_h = (src_w as f64 / target_aspect).round() as u32;
        let y = (src_h - crop_h) / 2;
        img.crop_imm(0, y, src_w, crop_h)
    };

    let resized = if cropped.width() == width && cropped.height() == height {
        cropped.to_rgb8()
    } else {
        cropped
            .resize_exact(width, height, FilterType::Lanczos3)
            .to_rgb8()
    };

    let mut pixels: Vec<f32> = Vec::with_capacity((width * height * 3) as usize);
    for p in resized.pixels() {
        pixels.push(p[0] as f32 / 255.0);
        pixels.push(p[1] as f32 / 255.0);
        pixels.push(p[2] as f32 / 255.0);
    }

    Ok(Array::from_slice(
        &pixels,
        &[1, height as i32, width as i32, 3],
    ))
}