use crate::alpha_handle_f16::premultiply_pixel_f16_row;
use core::f16;
use std::arch::aarch64::*;
#[target_feature(enable = "fp16")]
fn neon_premultiply_alpha_rgba_row_f16_full(dst: &mut [f16], src: &[f16]) {
let mut rem = dst;
let mut src_rem = src;
for (dst, src) in rem.chunks_exact_mut(8 * 4).zip(src_rem.chunks_exact(8 * 4)) {
let src_ptr = src.as_ptr();
let pixel = unsafe { vld4q_u16(src_ptr.cast()) };
let low_alpha = vreinterpretq_f16_u16(pixel.3);
let r_values = vmulq_f16(vreinterpretq_f16_u16(pixel.0), low_alpha);
let g_values = vmulq_f16(vreinterpretq_f16_u16(pixel.1), low_alpha);
let b_values = vmulq_f16(vreinterpretq_f16_u16(pixel.2), low_alpha);
let dst_ptr = dst.as_mut_ptr();
let store_pixel = uint16x8x4_t(
vreinterpretq_u16_f16(r_values),
vreinterpretq_u16_f16(g_values),
vreinterpretq_u16_f16(b_values),
pixel.3,
);
unsafe {
vst4q_u16(dst_ptr.cast(), store_pixel);
}
}
rem = rem.chunks_exact_mut(8 * 4).into_remainder();
src_rem = src_rem.chunks_exact(8 * 4).remainder();
premultiply_pixel_f16_row(rem, src_rem);
if !rem.is_empty() {
let mut transient: [f16; 4 * 8] = [0.; 4 * 8];
assert_eq!(rem.len(), src_rem.len());
assert!(rem.len() <= 4 * 8);
unsafe {
std::ptr::copy_nonoverlapping(src_rem.as_ptr(), transient.as_mut_ptr(), src_rem.len());
}
let pixel = unsafe { vld4q_u16(transient.as_ptr().cast()) };
let low_alpha = vreinterpretq_f16_u16(pixel.3);
let r_values = vmulq_f16(vreinterpretq_f16_u16(pixel.0), low_alpha);
let g_values = vmulq_f16(vreinterpretq_f16_u16(pixel.1), low_alpha);
let b_values = vmulq_f16(vreinterpretq_f16_u16(pixel.2), low_alpha);
let store_pixel = uint16x8x4_t(
vreinterpretq_u16_f16(r_values),
vreinterpretq_u16_f16(g_values),
vreinterpretq_u16_f16(b_values),
pixel.3,
);
unsafe {
vst4q_u16(transient.as_mut_ptr().cast(), store_pixel);
}
unsafe {
std::ptr::copy_nonoverlapping(transient.as_ptr(), rem.as_mut_ptr(), rem.len());
}
}
}
pub(crate) fn neon_premultiply_alpha_rgba_f16_full(dst: &mut [f16], src: &[f16]) {
unsafe {
neon_premultiply_alpha_rgba_row_f16_full(dst, src);
}
}
#[target_feature(enable = "fp16")]
fn neon_unpremultiply_alpha_rgba_f16_row_full(in_place: &mut [f16]) {
let mut rem = in_place;
for dst in rem.chunks_exact_mut(8 * 4) {
let src_ptr = dst.as_ptr();
let pixel = unsafe { vld4q_u16(src_ptr.cast()) };
let alphas = vreinterpretq_f16_u16(pixel.3);
let zero_mask = vceqzq_f16(alphas);
let r_values = vbslq_f16(
zero_mask,
vreinterpretq_f16_u16(pixel.0),
vdivq_f16(vreinterpretq_f16_u16(pixel.0), alphas),
);
let g_values = vbslq_f16(
zero_mask,
vreinterpretq_f16_u16(pixel.1),
vdivq_f16(vreinterpretq_f16_u16(pixel.1), alphas),
);
let b_values = vbslq_f16(
zero_mask,
vreinterpretq_f16_u16(pixel.2),
vdivq_f16(vreinterpretq_f16_u16(pixel.2), alphas),
);
let dst_ptr = dst.as_mut_ptr();
let store_pixel = uint16x8x4_t(
vreinterpretq_u16_f16(r_values),
vreinterpretq_u16_f16(g_values),
vreinterpretq_u16_f16(b_values),
pixel.3,
);
unsafe {
vst4q_u16(dst_ptr.cast(), store_pixel);
}
}
rem = rem.chunks_exact_mut(8 * 4).into_remainder();
if !rem.is_empty() {
let mut transient: [f16; 4 * 8] = [0.; 4 * 8];
assert!(rem.len() <= 4 * 8);
unsafe {
std::ptr::copy_nonoverlapping(rem.as_ptr(), transient.as_mut_ptr(), rem.len());
}
let pixel = unsafe { vld4q_u16(transient.as_ptr().cast()) };
let alphas = vreinterpretq_f16_u16(pixel.3);
let zero_mask = vceqzq_f16(alphas);
let r_values = vbslq_f16(
zero_mask,
vreinterpretq_f16_u16(pixel.0),
vdivq_f16(vreinterpretq_f16_u16(pixel.0), alphas),
);
let g_values = vbslq_f16(
zero_mask,
vreinterpretq_f16_u16(pixel.1),
vdivq_f16(vreinterpretq_f16_u16(pixel.1), alphas),
);
let b_values = vbslq_f16(
zero_mask,
vreinterpretq_f16_u16(pixel.2),
vdivq_f16(vreinterpretq_f16_u16(pixel.2), alphas),
);
let store_pixel = uint16x8x4_t(
vreinterpretq_u16_f16(r_values),
vreinterpretq_u16_f16(g_values),
vreinterpretq_u16_f16(b_values),
pixel.3,
);
unsafe {
vst4q_u16(transient.as_mut_ptr().cast(), store_pixel);
}
unsafe {
std::ptr::copy_nonoverlapping(transient.as_ptr(), rem.as_mut_ptr(), rem.len());
}
}
}
pub(crate) fn neon_unpremultiply_alpha_rgba_f16_full(in_place: &mut [f16]) {
unsafe {
neon_unpremultiply_alpha_rgba_f16_row_full(in_place);
}
}