#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::*;
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
pub unsafe fn row_rgba_to_gray_neon(input: &[u8], output: &mut [u8]) {
let pixel_count = output.len();
let chunks = pixel_count / 8;
let remainder = pixel_count % 8;
let coeff_r = vdup_n_u8(77);
let coeff_g = vdup_n_u8(150);
let coeff_b = vdup_n_u8(29);
let mut i = 0usize;
for _ in 0..chunks {
let rgba = vld4_u8(input.as_ptr().add(i * 4));
let r_wide = vmull_u8(rgba.0, coeff_r); let g_wide = vmull_u8(rgba.1, coeff_g); let b_wide = vmull_u8(rgba.2, coeff_b);
let sum = vaddq_u16(vaddq_u16(r_wide, g_wide), b_wide);
let gray = vshrn_n_u16(sum, 8);
vst1_u8(output.as_mut_ptr().add(i), gray);
i += 8;
}
for j in 0..remainder {
let idx = (i + j) * 4;
output[i + j] = super::color::rgba_to_gray(input[idx], input[idx + 1], input[idx + 2]);
}
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[allow(unused_variables)]
#[allow(dead_code)]
#[allow(clippy::never_loop)]
pub unsafe fn row_brightness_neon(row: &mut [u8], factor: f32) {
let pixel_count = row.len() / 4;
let chunks = pixel_count / 4;
let _remainder = pixel_count % 4;
let vfactor = vdupq_n_f32(factor);
let vzero = vdupq_n_f32(0.0);
let vmax = vdupq_n_f32(255.0);
if let Some(i) = (0..chunks).next() {
let rgba = vld4_u8(row.as_ptr().add(i * 4));
let _alpha = rgba.3;
let r = process_channel_neon(rgba.0, vfactor, vzero, vmax);
let g = process_channel_neon(rgba.1, vfactor, vzero, vmax);
let b = process_channel_neon(rgba.2, vfactor, vzero, vmax);
}
for px in row.chunks_exact_mut(4) {
px[0] = ((px[0] as f32 * factor).clamp(0.0, 255.0)) as u8;
px[1] = ((px[1] as f32 * factor).clamp(0.0, 255.0)) as u8;
px[2] = ((px[2] as f32 * factor).clamp(0.0, 255.0)) as u8;
}
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[allow(dead_code)]
unsafe fn process_channel_neon(
_ch: uint8x8_t,
_factor: float32x4_t,
_zero: float32x4_t,
_max: float32x4_t,
) -> uint8x8_t {
_ch
}
#[cfg(test)]
#[cfg(target_arch = "aarch64")]
mod tests {
use super::*;
#[test]
fn test_neon_rgba_to_gray() {
let mut input = vec![0u8; 16 * 4];
let mut output_neon = vec![0u8; 16];
let mut output_scalar = vec![0u8; 16];
for i in 0..16 {
input[i * 4] = (i * 16) as u8; input[i * 4 + 1] = (255 - i * 16) as u8; input[i * 4 + 2] = (i * 8) as u8; input[i * 4 + 3] = 255; }
unsafe {
row_rgba_to_gray_neon(&input, &mut output_neon);
}
super::super::color::row_rgba_to_gray(&input, &mut output_scalar);
for i in 0..16 {
assert!(
(output_neon[i] as i32 - output_scalar[i] as i32).abs() <= 1,
"Pixel {}: NEON={} Scalar={}",
i,
output_neon[i],
output_scalar[i]
);
}
}
#[test]
fn test_neon_gray_pure_colors() {
let input = [
255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0,
0, 255, 255, 0, 0, 255, 255, 0, 0, 255,
];
let mut output = [0u8; 8];
unsafe {
row_rgba_to_gray_neon(&input, &mut output);
}
for &v in &output {
assert!(
(v as i32 - 76).abs() <= 1,
"Red gray should be ~76, got {}",
v
);
}
let input = [
0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255,
0, 255, 0, 255, 0, 255, 0, 255, 0, 255,
];
let mut output = [0u8; 8];
unsafe {
row_rgba_to_gray_neon(&input, &mut output);
}
for &v in &output {
assert!(
(v as i32 - 150).abs() <= 1,
"Green gray should be ~150, got {}",
v
);
}
}
}