use magetypes::simd::backends::F32x4Convert;
use magetypes::simd::generic::f32x4;
#[inline(always)]
pub(super) fn tf_row_inplace<T: F32x4Convert>(
t: T,
row: &mut [f32],
channels: usize,
has_alpha: bool,
tf_x4: impl Fn(T, f32x4<T>) -> f32x4<T>,
tf_scalar: fn(f32) -> f32,
) {
if has_alpha && channels == 4 {
let (chunks, _tail) = f32x4::<T>::partition_slice_mut(t, row);
for chunk in chunks.iter_mut() {
let alpha = chunk[3];
let v = f32x4::load(t, chunk);
let converted = tf_x4(t, v);
converted.store(chunk);
chunk[3] = alpha;
}
} else if has_alpha && channels >= 2 {
for pixel in row.chunks_exact_mut(channels) {
for v in &mut pixel[..channels - 1] {
*v = tf_scalar(*v);
}
}
} else {
let (chunks, tail) = f32x4::<T>::partition_slice_mut(t, row);
for chunk in chunks.iter_mut() {
let v = f32x4::load(t, chunk);
let converted = tf_x4(t, v);
converted.store(chunk);
}
for v in tail.iter_mut() {
*v = tf_scalar(*v);
}
}
}