use core::mem::MaybeUninit;
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::{
vcvtq_f32_u32, vget_low_u8, vget_low_u16, vld1q_u8, vmovl_high_u8, vmovl_high_u16, vmovl_u8,
vmovl_u16, vst1q_f32,
};
#[inline]
#[doc(hidden)]
pub fn rgb_widen_scalar(out: &mut [MaybeUninit<f32>], src: &[u8]) {
assert_eq!(
out.len(),
src.len(),
"rgb_widen_scalar: out.len() ({}) must equal src.len() ({}) (one output f32 per input byte)",
out.len(),
src.len(),
);
for (slot, &b) in out.iter_mut().zip(src.iter()) {
slot.write(f32::from(b));
}
}
#[cfg(target_arch = "aarch64")]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb_widen_neon(out: &mut [MaybeUninit<f32>], src: &[u8]) {
assert_eq!(
out.len(),
src.len(),
"rgb_widen_neon: out.len() ({}) must equal src.len() ({}) (one output f32 per input byte)",
out.len(),
src.len(),
);
let n = src.len();
let body_len = n - (n % 16);
unsafe {
let src_base = src.as_ptr();
let dst_base = out.as_mut_ptr().cast::<f32>();
let mut i = 0usize;
while i + 16 <= body_len {
let v = vld1q_u8(src_base.add(i));
let v_lo16 = vmovl_u8(vget_low_u8(v));
let v_hi16 = vmovl_high_u8(v);
let v_f0 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_lo16)));
let v_f1 = vcvtq_f32_u32(vmovl_high_u16(v_lo16));
let v_f2 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_hi16)));
let v_f3 = vcvtq_f32_u32(vmovl_high_u16(v_hi16));
vst1q_f32(dst_base.add(i), v_f0);
vst1q_f32(dst_base.add(i + 4), v_f1);
vst1q_f32(dst_base.add(i + 8), v_f2);
vst1q_f32(dst_base.add(i + 12), v_f3);
i += 16;
}
}
if body_len < n {
rgb_widen_scalar(&mut out[body_len..], &src[body_len..]);
}
}
#[inline]
#[doc(hidden)]
pub fn rgb_widen(out: &mut [MaybeUninit<f32>], src: &[u8]) {
assert_eq!(
out.len(),
src.len(),
"simd::vlm::rgb_widen: out.len() ({}) must equal src.len() ({})",
out.len(),
src.len(),
);
#[cfg(target_arch = "aarch64")]
{
if crate::simd::is_neon_available() {
unsafe { rgb_widen_neon(out, src) };
return;
}
}
rgb_widen_scalar(out, src);
}
#[cfg(test)]
mod tests {
use core::mem::MaybeUninit;
use super::{rgb_widen, rgb_widen_scalar};
use crate::simd::diff::{assert_eq_over_lane_sweep, lane_sweep_lengths};
fn rgb_widen_scalar_init(src: &[u8]) -> Vec<f32> {
let n = src.len();
let mut v: Vec<f32> = Vec::with_capacity(n);
let spare: &mut [MaybeUninit<f32>] = v.spare_capacity_mut();
rgb_widen_scalar(&mut spare[..n], src);
unsafe { v.set_len(n) };
v
}
fn rgb_widen_dispatch_init(src: &[u8]) -> Vec<f32> {
let n = src.len();
let mut v: Vec<f32> = Vec::with_capacity(n);
let spare: &mut [MaybeUninit<f32>] = v.spare_capacity_mut();
rgb_widen(&mut spare[..n], src);
unsafe { v.set_len(n) };
v
}
#[cfg(target_arch = "aarch64")]
fn rgb_widen_neon_init(src: &[u8]) -> Vec<f32> {
let n = src.len();
let mut v: Vec<f32> = Vec::with_capacity(n);
let spare: &mut [MaybeUninit<f32>] = v.spare_capacity_mut();
unsafe {
super::rgb_widen_neon(&mut spare[..n], src);
v.set_len(n);
}
v
}
fn gen_rgb_bytes(n: usize) -> Vec<u8> {
(0..n).map(|i| ((i * 7) % 256) as u8).collect()
}
#[test]
fn rgb_widen_scalar_matches_dispatcher_exact() {
assert_eq_over_lane_sweep(
16,
rgb_widen_scalar_init,
rgb_widen_dispatch_init,
gen_rgb_bytes,
);
}
#[cfg(target_arch = "aarch64")]
#[test]
fn rgb_widen_neon_matches_scalar_bit_identical() {
if !crate::simd::is_neon_available() {
return;
}
for &n in &[
0usize, 1, 15, 16, 17, 31, 32, 33, 48, 49, 64, 100, 1024, 4096,
] {
let src = gen_rgb_bytes(n);
let scalar = rgb_widen_scalar_init(&src);
let neon = rgb_widen_neon_init(&src);
assert_eq!(neon, scalar, "rgb_widen_neon vs scalar differ at n={n}");
}
}
#[test]
fn rgb_widen_lane_sweep_covers_tile_boundaries() {
let sweep = lane_sweep_lengths(16);
assert_eq!(sweep, [0, 1, 15, 16, 17, 31, 32, 48, 49]);
}
#[test]
fn rgb_widen_empty_is_noop() {
assert!(rgb_widen_dispatch_init(&[]).is_empty());
assert!(rgb_widen_scalar_init(&[]).is_empty());
}
#[test]
fn rgb_widen_one_pixel_no_swap() {
let buf = rgb_widen_dispatch_init(&[10, 20, 30]);
assert_eq!(buf, vec![10.0_f32, 20.0, 30.0]);
}
#[test]
fn rgb_widen_sixteen_bytes_one_full_tile() {
let src = gen_rgb_bytes(16);
let buf = rgb_widen_dispatch_init(&src);
let scalar = rgb_widen_scalar_init(&src);
assert_eq!(buf, scalar);
assert_eq!(buf.len(), 16);
}
#[test]
fn rgb_widen_seventeen_bytes_tile_plus_one() {
let src = gen_rgb_bytes(17);
let buf = rgb_widen_dispatch_init(&src);
let scalar = rgb_widen_scalar_init(&src);
assert_eq!(buf, scalar);
assert_eq!(buf.len(), 17);
}
#[test]
fn image_to_array_rgb_matches_old_extend() {
let n = 512usize * 512 * 3;
type PatternFn<'a> = Box<dyn Fn() -> Vec<u8> + 'a>;
let patterns: [(&str, PatternFn<'_>); 4] = [
("all_zero", Box::new(move || vec![0u8; n])),
("all_255", Box::new(move || vec![255u8; n])),
(
"asymmetric",
Box::new(move || (0..n).map(|i| ((i * 13) % 256) as u8).collect()),
),
(
"gradient",
Box::new(move || {
let mut v = Vec::with_capacity(n);
for i in 0..n {
v.push((i % 256) as u8);
}
v
}),
),
];
for (name, make_pattern) in &patterns {
let raw = make_pattern();
assert_eq!(raw.len(), n, "pattern {name} length mismatch");
let mut old: Vec<f32> = Vec::with_capacity(n);
old.extend(raw.iter().map(|&b| f32::from(b)));
assert_eq!(old.len(), n, "OLD extend length mismatch ({name})");
let new = rgb_widen_dispatch_init(&raw);
assert_eq!(
new, old,
"dispatcher must produce byte-identical output to the reference extend (pattern={name})"
);
}
}
#[test]
#[should_panic(expected = "rgb_widen_scalar: out.len() (5) must equal src.len() (6)")]
fn rgb_widen_scalar_panics_on_size_mismatch_in_release() {
let src = [0u8; 6];
let mut v: Vec<f32> = Vec::with_capacity(5);
let spare: &mut [MaybeUninit<f32>] = v.spare_capacity_mut();
rgb_widen_scalar(&mut spare[..5], &src);
}
#[test]
#[should_panic(expected = "simd::vlm::rgb_widen: out.len() (5) must equal src.len() (6)")]
fn rgb_widen_dispatch_panics_on_size_mismatch_in_release() {
let src = [0u8; 6];
let mut v: Vec<f32> = Vec::with_capacity(5);
let spare: &mut [MaybeUninit<f32>] = v.spare_capacity_mut();
rgb_widen(&mut spare[..5], &src);
}
#[cfg(target_arch = "aarch64")]
#[test]
#[should_panic(expected = "rgb_widen_neon: out.len() (5) must equal src.len() (6)")]
fn rgb_widen_neon_panics_on_size_mismatch_in_release() {
if !crate::simd::is_neon_available() {
panic!("rgb_widen_neon: out.len() (5) must equal src.len() (6) (skipped — NEON unavailable)");
}
let src = [0u8; 6];
let mut v: Vec<f32> = Vec::with_capacity(5);
let spare: &mut [MaybeUninit<f32>] = v.spare_capacity_mut();
unsafe { super::rgb_widen_neon(&mut spare[..5], &src) };
}
}