wagahai_lut 0.1.0

CUBE LUT parser and image processing library with SIMD
Documentation
/*
 * SPDX-FileCopyrightText: © 2026 Jinwoo Park (pmnxis@gmail.com)
 *
 * SPDX-License-Identifier: MIT
 */

//! Common utilities for LUT processing

use wide::f32x4;

/// RGB color value (f32)
pub type Rgb = [f32; 3];

/// Convert RGB to u8 (simple scalar conversion)
/// Clamps values to [0, 1], multiplies by 255, and converts to u8
/// Note: Using scalar instead of SIMD as it's faster for this simple operation
#[inline]
pub fn rgb_to_u8(rgb: Rgb) -> [u8; 3] {
    [
        (rgb[0].clamp(0.0, 1.0) * 255.0) as u8,
        (rgb[1].clamp(0.0, 1.0) * 255.0) as u8,
        (rgb[2].clamp(0.0, 1.0) * 255.0) as u8,
    ]
}

/// SIMD-optimized trilinear interpolation between 8 corner values
/// Processes all three channels simultaneously using SIMD vectors
#[inline]
#[allow(clippy::too_many_arguments)]
pub fn trilinear_interpolate_simd(
    c000: Rgb,
    c100: Rgb,
    c010: Rgb,
    c110: Rgb,
    c001: Rgb,
    c101: Rgb,
    c011: Rgb,
    c111: Rgb,
    x: f32,
    y: f32,
    z: f32,
) -> Rgb {
    // Load RGB channels into SIMD vectors (f32x4 gives us 4 lanes, we use 3)
    let v000 = f32x4::new([c000[0], c000[1], c000[2], 0.0]);
    let v100 = f32x4::new([c100[0], c100[1], c100[2], 0.0]);
    let v010 = f32x4::new([c010[0], c010[1], c010[2], 0.0]);
    let v110 = f32x4::new([c110[0], c110[1], c110[2], 0.0]);
    let v001 = f32x4::new([c001[0], c001[1], c001[2], 0.0]);
    let v101 = f32x4::new([c101[0], c101[1], c101[2], 0.0]);
    let v011 = f32x4::new([c011[0], c011[1], c011[2], 0.0]);
    let v111 = f32x4::new([c111[0], c111[1], c111[2], 0.0]);

    // Broadcast interpolation weights to all lanes
    let vx = f32x4::splat(x);
    let vy = f32x4::splat(y);
    let vz = f32x4::splat(z);
    let vx_comp = f32x4::splat(1.0 - x);
    let vy_comp = f32x4::splat(1.0 - y);
    let vz_comp = f32x4::splat(1.0 - z);

    // Interpolate along x - 4 vector multiplications + 4 vector additions
    let v00 = v000 * vx_comp + v100 * vx;
    let v01 = v001 * vx_comp + v101 * vx;
    let v10 = v010 * vx_comp + v110 * vx;
    let v11 = v011 * vx_comp + v111 * vx;

    // Interpolate along y - 2 vector multiplications + 2 vector additions
    let v0 = v00 * vy_comp + v10 * vy;
    let v1 = v01 * vy_comp + v11 * vy;

    // Interpolate along z - 1 vector multiplication + 1 vector addition
    let v_result = v0 * vz_comp + v1 * vz;

    // Extract RGB values from SIMD vector
    let result = v_result.to_array();
    [result[0], result[1], result[2]]
}

/// Scalar trilinear interpolation between 8 corner values (for SoA layout)
/// Interpolates a single channel from 8 corner values
#[inline]
#[allow(clippy::too_many_arguments)]
pub fn trilinear_interpolate_scalar(
    c000: f32,
    c100: f32,
    c010: f32,
    c110: f32,
    c001: f32,
    c101: f32,
    c011: f32,
    c111: f32,
    x: f32,
    y: f32,
    z: f32,
) -> f32 {
    // Interpolate along x
    let c00 = c000 * (1.0 - x) + c100 * x;
    let c01 = c001 * (1.0 - x) + c101 * x;
    let c10 = c010 * (1.0 - x) + c110 * x;
    let c11 = c011 * (1.0 - x) + c111 * x;

    // Interpolate along y
    let c0 = c00 * (1.0 - y) + c10 * y;
    let c1 = c01 * (1.0 - y) + c11 * y;

    // Interpolate along z
    c0 * (1.0 - z) + c1 * z
}

/// Test version of scalar trilinear interpolation between 8 corner values
#[cfg(test)]
#[inline]
#[allow(clippy::too_many_arguments)]
pub fn __normal_test_trilinear_interpolate(
    c000: f32,
    c100: f32,
    c010: f32,
    c110: f32,
    c001: f32,
    c101: f32,
    c011: f32,
    c111: f32,
    x: f32,
    y: f32,
    z: f32,
) -> f32 {
    // Interpolate along x
    let c00 = c000 * (1.0 - x) + c100 * x;
    let c01 = c001 * (1.0 - x) + c101 * x;
    let c10 = c010 * (1.0 - x) + c110 * x;
    let c11 = c011 * (1.0 - x) + c111 * x;

    // Interpolate along y
    let c0 = c00 * (1.0 - y) + c10 * y;
    let c1 = c01 * (1.0 - y) + c11 * y;

    // Interpolate along z
    c0 * (1.0 - z) + c1 * z
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Test SIMD trilinear interpolation directly
    #[test]
    fn test_simd_trilinear_interpolation() {
        // Test known values
        let c000 = [0.0, 0.0, 0.0];
        let c100 = [1.0, 0.0, 0.0];
        let c010 = [0.0, 1.0, 0.0];
        let c110 = [1.0, 1.0, 0.0];
        let c001 = [0.0, 0.0, 1.0];
        let c101 = [1.0, 0.0, 1.0];
        let c011 = [0.0, 1.0, 1.0];
        let c111 = [1.0, 1.0, 1.0];

        // At (0, 0, 0), result should be c000
        let result = trilinear_interpolate_simd(
            c000, c100, c010, c110, c001, c101, c011, c111, 0.0, 0.0, 0.0,
        );
        assert_eq!(result, [0.0, 0.0, 0.0]);

        // At (1, 1, 1), result should be c111
        let result = trilinear_interpolate_simd(
            c000, c100, c010, c110, c001, c101, c011, c111, 1.0, 1.0, 1.0,
        );
        assert_eq!(result, [1.0, 1.0, 1.0]);

        // At (0.5, 0.5, 0.5), result should be [0.5, 0.5, 0.5]
        let result = trilinear_interpolate_simd(
            c000, c100, c010, c110, c001, c101, c011, c111, 0.5, 0.5, 0.5,
        );

        #[allow(clippy::needless_range_loop)]
        for c in 0..3 {
            assert!(
                (result[c] - 0.5).abs() < f32::EPSILON,
                "Channel {}: {}",
                c,
                result[c]
            );
        }
    }
}