use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use crate::mlaf::{mlaf, neg_mlaf};
use crate::safe_math::{SafeAdd, SafeMul};
use crate::{CmsError, MalformedSize, Vector3f, Vector4f};
use std::ops::{Add, Mul, Sub};
impl FusedMultiplyAdd<f32> for f32 {
#[inline(always)]
fn mla(&self, b: f32, c: f32) -> f32 {
mlaf(*self, b, c)
}
}
impl FusedMultiplyNegAdd<f32> for f32 {
#[inline(always)]
fn neg_mla(&self, b: f32, c: f32) -> f32 {
neg_mlaf(*self, b, c)
}
}
#[inline(always)]
pub(crate) fn lerp<
T: Mul<Output = T>
+ Sub<Output = T>
+ Add<Output = T>
+ From<f32>
+ Copy
+ FusedMultiplyAdd<T>
+ FusedMultiplyNegAdd<T>,
>(
a: T,
b: T,
t: T,
) -> T {
a.neg_mla(a, t).mla(b, t)
}
pub struct Hypercube<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
grid_size: [u8; 4],
}
trait Fetcher4<T> {
fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> T;
}
impl Hypercube<'_> {
pub fn new(
array: &[f32],
grid_size: usize,
channels: usize,
) -> Result<Hypercube<'_>, CmsError> {
if array.is_empty() || grid_size == 0 {
return Ok(Hypercube {
array,
x_stride: 0,
y_stride: 0,
z_stride: 0,
grid_size: [0, 0, 0, 0],
});
}
let z_stride = grid_size as u32;
let y_stride = z_stride * z_stride;
let x_stride = z_stride * z_stride * z_stride;
let last_index = (grid_size - 1)
.safe_mul(x_stride as usize)?
.safe_add((grid_size - 1).safe_mul(y_stride as usize)?)?
.safe_add((grid_size - 1).safe_mul(z_stride as usize)?)?
.safe_add(grid_size - 1)?
.safe_mul(channels)?;
if last_index >= array.len() {
return Err(CmsError::MalformedClut(MalformedSize {
size: array.len(),
expected: last_index,
}));
}
Ok(Hypercube {
array,
x_stride,
y_stride,
z_stride,
grid_size: [
grid_size as u8,
grid_size as u8,
grid_size as u8,
grid_size as u8,
],
})
}
pub fn new_hypercube(
array: &[f32],
grid_size: [u8; 4],
channels: usize,
) -> Result<Hypercube<'_>, CmsError> {
if array.is_empty()
|| grid_size[0] == 0
|| grid_size[1] == 0
|| grid_size[2] == 0
|| grid_size[3] == 0
{
return Ok(Hypercube {
array,
x_stride: 0,
y_stride: 0,
z_stride: 0,
grid_size,
});
}
let z_stride = grid_size[2] as u32;
let y_stride = z_stride * grid_size[1] as u32;
let x_stride = y_stride * grid_size[0] as u32;
let last_index = (grid_size[0] as usize - 1)
.safe_mul(x_stride as usize)?
.safe_add((grid_size[1] as usize - 1).safe_mul(y_stride as usize)?)?
.safe_add((grid_size[2] as usize - 1).safe_mul(z_stride as usize)?)?
.safe_add(grid_size[3] as usize - 1)?
.safe_mul(channels)?;
if last_index >= array.len() {
return Err(CmsError::MalformedClut(MalformedSize {
size: array.len(),
expected: last_index,
}));
}
Ok(Hypercube {
array,
x_stride,
y_stride,
z_stride,
grid_size,
})
}
}
struct Fetch4Vec3<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
}
struct Fetch4Vec4<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
}
impl Fetcher4<Vector3f> for Fetch4Vec3<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> Vector3f {
let start = (x as u32 * self.x_stride
+ y as u32 * self.y_stride
+ z as u32 * self.z_stride
+ w as u32) as usize
* 3;
let k = &self.array[start..start + 3];
Vector3f {
v: [k[0], k[1], k[2]],
}
}
}
impl Fetcher4<Vector4f> for Fetch4Vec4<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> Vector4f {
let start = (x as u32 * self.x_stride
+ y as u32 * self.y_stride
+ z as u32 * self.z_stride
+ w as u32) as usize
* 4;
let k = &self.array[start..start + 4];
Vector4f {
v: [k[0], k[1], k[2], k[3]],
}
}
}
impl Hypercube<'_> {
#[inline(always)]
fn quadlinear<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0);
let lin_y = lin_y.max(0.0);
let lin_z = lin_z.max(0.0);
let lin_w = lin_w.max(0.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor().min(scale_x) as i32;
let y = (lin_y * scale_y).floor().min(scale_y) as i32;
let z = (lin_z * scale_z).floor().min(scale_z) as i32;
let w = (lin_w * scale_w).floor().min(scale_w) as i32;
let x_n = (lin_x * scale_x).ceil().min(scale_x) as i32;
let y_n = (lin_y * scale_y).ceil().min(scale_y) as i32;
let z_n = (lin_z * scale_z).ceil().min(scale_z) as i32;
let w_n = (lin_w * scale_w).ceil().min(scale_w) as i32;
let x_d = T::from(lin_x * scale_x - x as f32);
let y_d = T::from(lin_y * scale_y - y as f32);
let z_d = T::from(lin_z * scale_z - z as f32);
let w_d = T::from(lin_w * scale_w - w as f32);
let r_x1 = lerp(r.fetch(x, y, z, w), r.fetch(x_n, y, z, w), x_d);
let r_x2 = lerp(r.fetch(x, y_n, z, w), r.fetch(x_n, y_n, z, w), x_d);
let r_y1 = lerp(r_x1, r_x2, y_d);
let r_x3 = lerp(r.fetch(x, y, z_n, w), r.fetch(x_n, y, z_n, w), x_d);
let r_x4 = lerp(r.fetch(x, y_n, z_n, w), r.fetch(x_n, y_n, z_n, w), x_d);
let r_y2 = lerp(r_x3, r_x4, y_d);
let r_z1 = lerp(r_y1, r_y2, z_d);
let r_x1 = lerp(r.fetch(x, y, z, w_n), r.fetch(x_n, y, z, w_n), x_d);
let r_x2 = lerp(r.fetch(x, y_n, z, w_n), r.fetch(x_n, y_n, z, w_n), x_d);
let r_y1 = lerp(r_x1, r_x2, y_d);
let r_x3 = lerp(r.fetch(x, y, z_n, w_n), r.fetch(x_n, y, z_n, w_n), x_d);
let r_x4 = lerp(r.fetch(x, y_n, z_n, w_n), r.fetch(x_n, y_n, z_n, w_n), x_d);
let r_y2 = lerp(r_x3, r_x4, y_d);
let r_z2 = lerp(r_y1, r_y2, z_d);
lerp(r_z1, r_z2, w_d)
}
#[inline]
pub fn quadlinear_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector3f {
self.quadlinear(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[inline]
pub fn quadlinear_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector4f {
self.quadlinear(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec4 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
#[inline(always)]
fn pyramid<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0);
let lin_y = lin_y.max(0.0);
let lin_z = lin_z.max(0.0);
let lin_w = lin_w.max(0.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor().min(scale_x) as i32;
let y = (lin_y * scale_y).floor().min(scale_y) as i32;
let z = (lin_z * scale_z).floor().min(scale_z) as i32;
let w = (lin_w * scale_w).floor().min(scale_w) as i32;
let x_n = (lin_x * scale_x).ceil().min(scale_x) as i32;
let y_n = (lin_y * scale_y).ceil().min(scale_y) as i32;
let z_n = (lin_z * scale_z).ceil().min(scale_z) as i32;
let w_n = (lin_w * scale_w).ceil().min(scale_w) as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let dw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let w0 = if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n, w);
let x1 = r.fetch(x_n, y_n, z, w);
let x2 = r.fetch(x_n, y, z, w);
let x3 = r.fetch(x, y_n, z, w);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y_n, z_n, w);
let x2 = r.fetch(x, y_n, z_n, w);
let x3 = r.fetch(x, y_n, z, w);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y, z, w);
let x2 = r.fetch(x_n, y, z_n, w);
let x3 = r.fetch(x_n, y_n, z_n, w);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
};
let c0 = r.fetch(x, y, z, w_n);
let w1 = if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n, w_n);
let x1 = r.fetch(x_n, y_n, z, w_n);
let x2 = r.fetch(x_n, y, z, w_n);
let x3 = r.fetch(x, y_n, z, w_n);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y_n, z_n, w_n);
let x2 = r.fetch(x, y_n, z_n, w_n);
let x3 = r.fetch(x, y_n, z, w_n);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y, z, w_n);
let x2 = r.fetch(x_n, y, z_n, w_n);
let x3 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
};
w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
#[inline]
pub fn pyramid_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector3f {
self.pyramid(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
#[inline]
pub fn pyramid_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector4f {
self.pyramid(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec4 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
#[inline(always)]
fn prism<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0);
let lin_y = lin_y.max(0.0);
let lin_z = lin_z.max(0.0);
let lin_w = lin_w.max(0.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor().min(scale_x) as i32;
let y = (lin_y * scale_y).floor().min(scale_y) as i32;
let z = (lin_z * scale_z).floor().min(scale_z) as i32;
let w = (lin_w * scale_w).floor().min(scale_w) as i32;
let x_n = (lin_x * scale_x).ceil().min(scale_x) as i32;
let y_n = (lin_y * scale_y).ceil().min(scale_y) as i32;
let z_n = (lin_z * scale_z).ceil().min(scale_z) as i32;
let w_n = (lin_w * scale_w).ceil().min(scale_w) as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let dw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let w0 = if db >= dr {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y, z_n, w);
let x2 = r.fetch(x, y_n, z, w);
let x3 = r.fetch(x, y_n, z_n, w);
let x4 = r.fetch(x_n, y_n, z_n, w);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z, w);
let x1 = r.fetch(x_n, y, z_n, w);
let x2 = r.fetch(x, y_n, z, w);
let x3 = r.fetch(x_n, y_n, z, w);
let x4 = r.fetch(x_n, y_n, z_n, w);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
};
let c0 = r.fetch(x, y, z, w_n);
let w1 = if db >= dr {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y, z_n, w_n);
let x2 = r.fetch(x, y_n, z, w_n);
let x3 = r.fetch(x, y_n, z_n, w_n);
let x4 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z, w_n);
let x1 = r.fetch(x_n, y, z_n, w_n);
let x2 = r.fetch(x, y_n, z, w_n);
let x3 = r.fetch(x_n, y_n, z, w_n);
let x4 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
};
w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
#[inline]
pub fn prism_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector3f {
self.prism(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
#[inline]
pub fn prism_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector4f {
self.prism(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec4 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
#[inline(always)]
fn tetra<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0);
let lin_y = lin_y.max(0.0);
let lin_z = lin_z.max(0.0);
let lin_w = lin_w.max(0.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor().min(scale_x) as i32;
let y = (lin_y * scale_y).floor().min(scale_y) as i32;
let z = (lin_z * scale_z).floor().min(scale_z) as i32;
let w = (lin_w * scale_w).floor().min(scale_w) as i32;
let x_n = (lin_x * scale_x).ceil().min(scale_x) as i32;
let y_n = (lin_y * scale_y).ceil().min(scale_y) as i32;
let z_n = (lin_z * scale_z).ceil().min(scale_z) as i32;
let w_n = (lin_w * scale_w).ceil().min(scale_w) as i32;
let rx = lin_x * scale_x - x as f32;
let ry = lin_y * scale_y - y as f32;
let rz = lin_z * scale_z - z as f32;
let rw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
c1 = r.fetch(x_n, y, z, w) - c0;
c2 = r.fetch(x_n, y_n, z, w) - r.fetch(x_n, y, z, w);
c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
} else if rx >= rz {
c1 = r.fetch(x_n, y, z, w) - c0;
c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
c3 = r.fetch(x_n, y, z_n, w) - r.fetch(x_n, y, z, w);
} else {
c1 = r.fetch(x_n, y, z_n, w) - r.fetch(x, y, z_n, w);
c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
c3 = r.fetch(x, y, z_n, w) - c0;
}
} else if rx >= rz {
c1 = r.fetch(x_n, y_n, z, w) - r.fetch(x, y_n, z, w);
c2 = r.fetch(x, y_n, z, w) - c0;
c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
} else if ry >= rz {
c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
c2 = r.fetch(x, y_n, z, w) - c0;
c3 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y_n, z, w);
} else {
c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
c2 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y, z_n, w);
c3 = r.fetch(x, y, z_n, w) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
let w0 = s1.mla(c3, T::from(rz));
let c0 = r.fetch(x, y, z, w_n);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
c1 = r.fetch(x_n, y, z, w_n) - c0;
c2 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x_n, y, z, w_n);
c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
} else if rx >= rz {
c1 = r.fetch(x_n, y, z, w_n) - c0;
c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
c3 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x_n, y, z, w_n);
} else {
c1 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x, y, z_n, w_n);
c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
c3 = r.fetch(x, y, z_n, w_n) - c0;
}
} else if rx >= rz {
c1 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x, y_n, z, w_n);
c2 = r.fetch(x, y_n, z, w_n) - c0;
c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
} else if ry >= rz {
c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
c2 = r.fetch(x, y_n, z, w_n) - c0;
c3 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y_n, z, w_n);
} else {
c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
c2 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y, z_n, w_n);
c3 = r.fetch(x, y, z_n, w_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
let w1 = s1.mla(c3, T::from(rz));
w0.neg_mla(w0, T::from(rw)).mla(w1, T::from(rw))
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
#[inline]
pub fn tetra_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector3f {
self.tetra(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
#[inline]
pub fn tetra_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector4f {
self.tetra(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec4 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
}
pub struct Cube<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
grid_size: [u8; 3],
}
pub(crate) trait ArrayFetch<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
struct ArrayFetchVector3f<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
}
impl ArrayFetch<Vector3f> for ArrayFetchVector3f<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> Vector3f {
let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32) as usize * 3;
let k = &self.array[start..start + 3];
Vector3f {
v: [k[0], k[1], k[2]],
}
}
}
struct ArrayFetchVector4f<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
}
impl ArrayFetch<Vector4f> for ArrayFetchVector4f<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> Vector4f {
let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32) as usize * 4;
let k = &self.array[start..start + 4];
Vector4f {
v: [k[0], k[1], k[2], k[3]],
}
}
}
impl Cube<'_> {
pub fn new(array: &[f32], grid_size: usize, channels: usize) -> Result<Cube<'_>, CmsError> {
if array.is_empty() || grid_size == 0 {
return Ok(Cube {
array,
x_stride: 0,
y_stride: 0,
grid_size: [0, 0, 0],
});
}
let y_stride = grid_size;
let x_stride = y_stride * y_stride;
let last_index = (grid_size - 1)
.safe_mul(x_stride)?
.safe_add((grid_size - 1).safe_mul(y_stride)?)?
.safe_add(grid_size - 1)?
.safe_mul(channels)?;
if last_index >= array.len() {
return Err(CmsError::MalformedClut(MalformedSize {
size: array.len(),
expected: last_index,
}));
}
Ok(Cube {
array,
x_stride: x_stride as u32,
y_stride: y_stride as u32,
grid_size: [grid_size as u8, grid_size as u8, grid_size as u8],
})
}
pub fn new_cube(
array: &[f32],
grid_size: [u8; 3],
channels: usize,
) -> Result<Cube<'_>, CmsError> {
if array.is_empty() || grid_size[0] == 0 || grid_size[1] == 0 || grid_size[2] == 0 {
return Ok(Cube {
array,
x_stride: 0,
y_stride: 0,
grid_size,
});
}
let y_stride = grid_size[2] as u32;
let x_stride = y_stride * grid_size[1] as u32;
let last_index = (grid_size[0] as usize - 1)
.safe_mul(x_stride as usize)?
.safe_add((grid_size[1] as usize - 1).safe_mul(y_stride as usize)?)?
.safe_add(grid_size[2] as usize - 1)?
.safe_mul(channels)?;
if last_index >= array.len() {
return Err(CmsError::MalformedClut(MalformedSize {
size: array.len(),
expected: last_index,
}));
}
Ok(Cube {
array,
x_stride,
y_stride,
grid_size,
})
}
#[inline(always)]
fn trilinear<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyNegAdd<T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl ArrayFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0);
let lin_y = lin_y.max(0.0);
let lin_z = lin_z.max(0.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor().min(scale_x) as i32;
let y = (lin_y * scale_y).floor().min(scale_y) as i32;
let z = (lin_z * scale_z).floor().min(scale_z) as i32;
let x_n = (lin_x * scale_x).ceil().min(scale_x) as i32;
let y_n = (lin_y * scale_y).ceil().min(scale_y) as i32;
let z_n = (lin_z * scale_z).ceil().min(scale_z) as i32;
let x_d = T::from(lin_x * scale_x - x as f32);
let y_d = T::from(lin_y * scale_y - y as f32);
let z_d = T::from(lin_z * scale_z - z as f32);
let c000 = fetch.fetch(x, y, z);
let c100 = fetch.fetch(x_n, y, z);
let c010 = fetch.fetch(x, y_n, z);
let c110 = fetch.fetch(x_n, y_n, z);
let c001 = fetch.fetch(x, y, z_n);
let c101 = fetch.fetch(x_n, y, z_n);
let c011 = fetch.fetch(x, y_n, z_n);
let c111 = fetch.fetch(x_n, y_n, z_n);
let c00 = c000.neg_mla(c000, x_d).mla(c100, x_d);
let c10 = c010.neg_mla(c010, x_d).mla(c110, x_d);
let c01 = c001.neg_mla(c001, x_d).mla(c101, x_d);
let c11 = c011.neg_mla(c011, x_d).mla(c111, x_d);
let c0 = c00.neg_mla(c00, y_d).mla(c10, y_d);
let c1 = c01.neg_mla(c01, y_d).mla(c11, y_d);
c0.neg_mla(c0, z_d).mla(c1, z_d)
}
#[cfg(feature = "options")]
#[inline]
fn pyramid<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl ArrayFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0);
let lin_y = lin_y.max(0.0);
let lin_z = lin_z.max(0.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor().min(scale_x) as i32;
let y = (lin_y * scale_y).floor().min(scale_y) as i32;
let z = (lin_z * scale_z).floor().min(scale_z) as i32;
let x_n = (lin_x * scale_x).ceil().min(scale_x) as i32;
let y_n = (lin_y * scale_y).ceil().min(scale_y) as i32;
let z_n = (lin_z * scale_z).ceil().min(scale_z) as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
if dr > db && dg > db {
let x0 = fetch.fetch(x_n, y_n, z_n);
let x1 = fetch.fetch(x_n, y_n, z);
let x2 = fetch.fetch(x_n, y, z);
let x3 = fetch.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y_n, z_n);
let x2 = fetch.fetch(x, y_n, z_n);
let x3 = fetch.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y, z);
let x2 = fetch.fetch(x_n, y, z_n);
let x3 = fetch.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
}
}
#[cfg(feature = "options")]
#[inline]
fn tetra<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl ArrayFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0);
let lin_y = lin_y.max(0.0);
let lin_z = lin_z.max(0.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor().min(scale_x) as i32;
let y = (lin_y * scale_y).floor().min(scale_y) as i32;
let z = (lin_z * scale_z).floor().min(scale_z) as i32;
let x_n = (lin_x * scale_x).ceil().min(scale_x) as i32;
let y_n = (lin_y * scale_y).ceil().min(scale_y) as i32;
let z_n = (lin_z * scale_z).ceil().min(scale_z) as i32;
let rx = lin_x * scale_x - x as f32;
let ry = lin_y * scale_y - y as f32;
let rz = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
c1 = fetch.fetch(x_n, y, z) - c0;
c2 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x_n, y, z);
c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z);
} else if rx >= rz {
c1 = fetch.fetch(x_n, y, z) - c0;
c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n);
c3 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x_n, y, z);
} else {
c1 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x, y, z_n);
c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n);
c3 = fetch.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
c1 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x, y_n, z);
c2 = fetch.fetch(x, y_n, z) - c0;
c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z);
} else if ry >= rz {
c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n);
c2 = fetch.fetch(x, y_n, z) - c0;
c3 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y_n, z);
} else {
c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n);
c2 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y, z_n);
c3 = fetch.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
s1.mla(c3, T::from(rz))
}
#[cfg(feature = "options")]
#[inline]
fn prism<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl ArrayFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0);
let lin_y = lin_y.max(0.0);
let lin_z = lin_z.max(0.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor().min(scale_x) as i32;
let y = (lin_y * scale_y).floor().min(scale_y) as i32;
let z = (lin_z * scale_z).floor().min(scale_z) as i32;
let x_n = (lin_x * scale_x).ceil().min(scale_x) as i32;
let y_n = (lin_y * scale_y).ceil().min(scale_y) as i32;
let z_n = (lin_z * scale_z).ceil().min(scale_z) as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
if db >= dr {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y, z_n);
let x2 = fetch.fetch(x, y_n, z);
let x3 = fetch.fetch(x, y_n, z_n);
let x4 = fetch.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = fetch.fetch(x_n, y, z);
let x1 = fetch.fetch(x_n, y, z_n);
let x2 = fetch.fetch(x, y_n, z);
let x3 = fetch.fetch(x_n, y_n, z);
let x4 = fetch.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
}
}
pub fn trilinear_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector3f {
self.trilinear(
lin_x,
lin_y,
lin_z,
ArrayFetchVector3f {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
pub fn prism_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector3f {
self.prism(
lin_x,
lin_y,
lin_z,
ArrayFetchVector3f {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
pub fn pyramid_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector3f {
self.pyramid(
lin_x,
lin_y,
lin_z,
ArrayFetchVector3f {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
pub fn tetra_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector3f {
self.tetra(
lin_x,
lin_y,
lin_z,
ArrayFetchVector3f {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
pub fn trilinear_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector4f {
self.trilinear(
lin_x,
lin_y,
lin_z,
ArrayFetchVector4f {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
pub fn tetra_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector4f {
self.tetra(
lin_x,
lin_y,
lin_z,
ArrayFetchVector4f {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
pub fn pyramid_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector4f {
self.pyramid(
lin_x,
lin_y,
lin_z,
ArrayFetchVector4f {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[cfg_attr(docsrs, doc(cfg(feature = "options")))]
pub fn prism_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector4f {
self.prism(
lin_x,
lin_y,
lin_z,
ArrayFetchVector4f {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
}