use core::arch::aarch64::*;
use crate::row::scalar;
#[inline(always)]
unsafe fn expand5(c: uint16x8_t) -> uint16x8_t {
unsafe { vorrq_u16(vshlq_n_u16::<3>(c), vshrq_n_u16::<2>(c)) }
}
#[inline(always)]
unsafe fn expand6(c: uint16x8_t) -> uint16x8_t {
unsafe { vorrq_u16(vshlq_n_u16::<2>(c), vshrq_n_u16::<4>(c)) }
}
#[inline(always)]
unsafe fn expand4(c: uint16x8_t) -> uint16x8_t {
unsafe { vorrq_u16(vshlq_n_u16::<4>(c), c) }
}
#[inline(always)]
unsafe fn load_u16x8_le(ptr: *const u8) -> uint16x8_t {
unsafe {
let v = vld1q_u16(ptr.cast::<u16>());
if cfg!(target_endian = "big") {
vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(v)))
} else {
v
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb565_to_rgb_row(src: &[u8], rgb_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mask6 = vdupq_n_u16(0x3F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r5 = vandq_u16(vshrq_n_u16::<11>(px), mask5);
let g6 = vandq_u16(vshrq_n_u16::<5>(px), mask6);
let b5 = vandq_u16(px, mask5);
let r_u8 = vqmovn_u16(expand5(r5));
let g_u8 = vqmovn_u16(expand6(g6));
let b_u8 = vqmovn_u16(expand5(b5));
vst3_u8(
rgb_out.as_mut_ptr().add(x * 3),
uint8x8x3_t(r_u8, g_u8, b_u8),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb565_to_rgb_row(&src[x * 2..], &mut rgb_out[x * 3..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb565_to_rgba_row(src: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mask6 = vdupq_n_u16(0x3F);
let alpha = vdup_n_u8(0xFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r5 = vandq_u16(vshrq_n_u16::<11>(px), mask5);
let g6 = vandq_u16(vshrq_n_u16::<5>(px), mask6);
let b5 = vandq_u16(px, mask5);
let r_u8 = vqmovn_u16(expand5(r5));
let g_u8 = vqmovn_u16(expand6(g6));
let b_u8 = vqmovn_u16(expand5(b5));
vst4_u8(
rgba_out.as_mut_ptr().add(x * 4),
uint8x8x4_t(r_u8, g_u8, b_u8, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb565_to_rgba_row(&src[x * 2..], &mut rgba_out[x * 4..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb565_to_rgb_u16_row(src: &[u8], rgb_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_u16_out.len() >= width * 3, "rgb_u16_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mask6 = vdupq_n_u16(0x3F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r = vandq_u16(vshrq_n_u16::<11>(px), mask5);
let g = vandq_u16(vshrq_n_u16::<5>(px), mask6);
let b = vandq_u16(px, mask5);
vst3q_u16(rgb_u16_out.as_mut_ptr().add(x * 3), uint16x8x3_t(r, g, b));
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb565_to_rgb_u16_row(
&src[x * 2..],
&mut rgb_u16_out[x * 3..],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb565_to_rgba_u16_row(src: &[u8], rgba_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_u16_out.len() >= width * 4, "rgba_u16_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mask6 = vdupq_n_u16(0x3F);
let alpha = vdupq_n_u16(0xFFFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r = vandq_u16(vshrq_n_u16::<11>(px), mask5);
let g = vandq_u16(vshrq_n_u16::<5>(px), mask6);
let b = vandq_u16(px, mask5);
vst4q_u16(
rgba_u16_out.as_mut_ptr().add(x * 4),
uint16x8x4_t(r, g, b, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb565_to_rgba_u16_row(
&src[x * 2..],
&mut rgba_u16_out[x * 4..],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr565_to_rgb_row(src: &[u8], rgb_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mask6 = vdupq_n_u16(0x3F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b5 = vandq_u16(vshrq_n_u16::<11>(px), mask5);
let g6 = vandq_u16(vshrq_n_u16::<5>(px), mask6);
let r5 = vandq_u16(px, mask5);
let r_u8 = vqmovn_u16(expand5(r5));
let g_u8 = vqmovn_u16(expand6(g6));
let b_u8 = vqmovn_u16(expand5(b5));
vst3_u8(
rgb_out.as_mut_ptr().add(x * 3),
uint8x8x3_t(r_u8, g_u8, b_u8),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr565_to_rgb_row(&src[x * 2..], &mut rgb_out[x * 3..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr565_to_rgba_row(src: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mask6 = vdupq_n_u16(0x3F);
let alpha = vdup_n_u8(0xFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b5 = vandq_u16(vshrq_n_u16::<11>(px), mask5);
let g6 = vandq_u16(vshrq_n_u16::<5>(px), mask6);
let r5 = vandq_u16(px, mask5);
let r_u8 = vqmovn_u16(expand5(r5));
let g_u8 = vqmovn_u16(expand6(g6));
let b_u8 = vqmovn_u16(expand5(b5));
vst4_u8(
rgba_out.as_mut_ptr().add(x * 4),
uint8x8x4_t(r_u8, g_u8, b_u8, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr565_to_rgba_row(&src[x * 2..], &mut rgba_out[x * 4..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr565_to_rgb_u16_row(src: &[u8], rgb_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_u16_out.len() >= width * 3, "rgb_u16_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mask6 = vdupq_n_u16(0x3F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b = vandq_u16(vshrq_n_u16::<11>(px), mask5);
let g = vandq_u16(vshrq_n_u16::<5>(px), mask6);
let r = vandq_u16(px, mask5);
vst3q_u16(rgb_u16_out.as_mut_ptr().add(x * 3), uint16x8x3_t(r, g, b));
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr565_to_rgb_u16_row(
&src[x * 2..],
&mut rgb_u16_out[x * 3..],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr565_to_rgba_u16_row(src: &[u8], rgba_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_u16_out.len() >= width * 4, "rgba_u16_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mask6 = vdupq_n_u16(0x3F);
let alpha = vdupq_n_u16(0xFFFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b = vandq_u16(vshrq_n_u16::<11>(px), mask5);
let g = vandq_u16(vshrq_n_u16::<5>(px), mask6);
let r = vandq_u16(px, mask5);
vst4q_u16(
rgba_u16_out.as_mut_ptr().add(x * 4),
uint16x8x4_t(r, g, b, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr565_to_rgba_u16_row(
&src[x * 2..],
&mut rgba_u16_out[x * 4..],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb555_to_rgb_row(src: &[u8], rgb_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r5 = vandq_u16(vshrq_n_u16::<10>(px), mask5);
let g5 = vandq_u16(vshrq_n_u16::<5>(px), mask5);
let b5 = vandq_u16(px, mask5);
let r_u8 = vqmovn_u16(expand5(r5));
let g_u8 = vqmovn_u16(expand5(g5));
let b_u8 = vqmovn_u16(expand5(b5));
vst3_u8(
rgb_out.as_mut_ptr().add(x * 3),
uint8x8x3_t(r_u8, g_u8, b_u8),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb555_to_rgb_row(&src[x * 2..], &mut rgb_out[x * 3..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb555_to_rgba_row(src: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let alpha = vdup_n_u8(0xFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r5 = vandq_u16(vshrq_n_u16::<10>(px), mask5);
let g5 = vandq_u16(vshrq_n_u16::<5>(px), mask5);
let b5 = vandq_u16(px, mask5);
let r_u8 = vqmovn_u16(expand5(r5));
let g_u8 = vqmovn_u16(expand5(g5));
let b_u8 = vqmovn_u16(expand5(b5));
vst4_u8(
rgba_out.as_mut_ptr().add(x * 4),
uint8x8x4_t(r_u8, g_u8, b_u8, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb555_to_rgba_row(&src[x * 2..], &mut rgba_out[x * 4..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb555_to_rgb_u16_row(src: &[u8], rgb_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_u16_out.len() >= width * 3, "rgb_u16_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r = vandq_u16(vshrq_n_u16::<10>(px), mask5);
let g = vandq_u16(vshrq_n_u16::<5>(px), mask5);
let b = vandq_u16(px, mask5);
vst3q_u16(rgb_u16_out.as_mut_ptr().add(x * 3), uint16x8x3_t(r, g, b));
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb555_to_rgb_u16_row(
&src[x * 2..],
&mut rgb_u16_out[x * 3..],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb555_to_rgba_u16_row(src: &[u8], rgba_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_u16_out.len() >= width * 4, "rgba_u16_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let alpha = vdupq_n_u16(0xFFFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r = vandq_u16(vshrq_n_u16::<10>(px), mask5);
let g = vandq_u16(vshrq_n_u16::<5>(px), mask5);
let b = vandq_u16(px, mask5);
vst4q_u16(
rgba_u16_out.as_mut_ptr().add(x * 4),
uint16x8x4_t(r, g, b, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb555_to_rgba_u16_row(
&src[x * 2..],
&mut rgba_u16_out[x * 4..],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr555_to_rgb_row(src: &[u8], rgb_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b5 = vandq_u16(vshrq_n_u16::<10>(px), mask5);
let g5 = vandq_u16(vshrq_n_u16::<5>(px), mask5);
let r5 = vandq_u16(px, mask5);
let r_u8 = vqmovn_u16(expand5(r5));
let g_u8 = vqmovn_u16(expand5(g5));
let b_u8 = vqmovn_u16(expand5(b5));
vst3_u8(
rgb_out.as_mut_ptr().add(x * 3),
uint8x8x3_t(r_u8, g_u8, b_u8),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr555_to_rgb_row(&src[x * 2..], &mut rgb_out[x * 3..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr555_to_rgba_row(src: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let alpha = vdup_n_u8(0xFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b5 = vandq_u16(vshrq_n_u16::<10>(px), mask5);
let g5 = vandq_u16(vshrq_n_u16::<5>(px), mask5);
let r5 = vandq_u16(px, mask5);
let r_u8 = vqmovn_u16(expand5(r5));
let g_u8 = vqmovn_u16(expand5(g5));
let b_u8 = vqmovn_u16(expand5(b5));
vst4_u8(
rgba_out.as_mut_ptr().add(x * 4),
uint8x8x4_t(r_u8, g_u8, b_u8, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr555_to_rgba_row(&src[x * 2..], &mut rgba_out[x * 4..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr555_to_rgb_u16_row(src: &[u8], rgb_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_u16_out.len() >= width * 3, "rgb_u16_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b = vandq_u16(vshrq_n_u16::<10>(px), mask5);
let g = vandq_u16(vshrq_n_u16::<5>(px), mask5);
let r = vandq_u16(px, mask5);
vst3q_u16(rgb_u16_out.as_mut_ptr().add(x * 3), uint16x8x3_t(r, g, b));
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr555_to_rgb_u16_row(
&src[x * 2..],
&mut rgb_u16_out[x * 3..],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr555_to_rgba_u16_row(src: &[u8], rgba_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_u16_out.len() >= width * 4, "rgba_u16_out too short");
unsafe {
let mask5 = vdupq_n_u16(0x1F);
let alpha = vdupq_n_u16(0xFFFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b = vandq_u16(vshrq_n_u16::<10>(px), mask5);
let g = vandq_u16(vshrq_n_u16::<5>(px), mask5);
let r = vandq_u16(px, mask5);
vst4q_u16(
rgba_u16_out.as_mut_ptr().add(x * 4),
uint16x8x4_t(r, g, b, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr555_to_rgba_u16_row(
&src[x * 2..],
&mut rgba_u16_out[x * 4..],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb444_to_rgb_row(src: &[u8], rgb_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out too short");
unsafe {
let mask4 = vdupq_n_u16(0x0F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r4 = vandq_u16(vshrq_n_u16::<8>(px), mask4);
let g4 = vandq_u16(vshrq_n_u16::<4>(px), mask4);
let b4 = vandq_u16(px, mask4);
let r_u8 = vqmovn_u16(expand4(r4));
let g_u8 = vqmovn_u16(expand4(g4));
let b_u8 = vqmovn_u16(expand4(b4));
vst3_u8(
rgb_out.as_mut_ptr().add(x * 3),
uint8x8x3_t(r_u8, g_u8, b_u8),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb444_to_rgb_row(&src[x * 2..], &mut rgb_out[x * 3..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb444_to_rgba_row(src: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out too short");
unsafe {
let mask4 = vdupq_n_u16(0x0F);
let alpha = vdup_n_u8(0xFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r4 = vandq_u16(vshrq_n_u16::<8>(px), mask4);
let g4 = vandq_u16(vshrq_n_u16::<4>(px), mask4);
let b4 = vandq_u16(px, mask4);
let r_u8 = vqmovn_u16(expand4(r4));
let g_u8 = vqmovn_u16(expand4(g4));
let b_u8 = vqmovn_u16(expand4(b4));
vst4_u8(
rgba_out.as_mut_ptr().add(x * 4),
uint8x8x4_t(r_u8, g_u8, b_u8, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb444_to_rgba_row(&src[x * 2..], &mut rgba_out[x * 4..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb444_to_rgb_u16_row(src: &[u8], rgb_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_u16_out.len() >= width * 3, "rgb_u16_out too short");
unsafe {
let mask4 = vdupq_n_u16(0x0F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r = vandq_u16(vshrq_n_u16::<8>(px), mask4);
let g = vandq_u16(vshrq_n_u16::<4>(px), mask4);
let b = vandq_u16(px, mask4);
vst3q_u16(rgb_u16_out.as_mut_ptr().add(x * 3), uint16x8x3_t(r, g, b));
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb444_to_rgb_u16_row(
&src[x * 2..],
&mut rgb_u16_out[x * 3..],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn rgb444_to_rgba_u16_row(src: &[u8], rgba_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_u16_out.len() >= width * 4, "rgba_u16_out too short");
unsafe {
let mask4 = vdupq_n_u16(0x0F);
let alpha = vdupq_n_u16(0xFFFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let r = vandq_u16(vshrq_n_u16::<8>(px), mask4);
let g = vandq_u16(vshrq_n_u16::<4>(px), mask4);
let b = vandq_u16(px, mask4);
vst4q_u16(
rgba_u16_out.as_mut_ptr().add(x * 4),
uint16x8x4_t(r, g, b, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::rgb444_to_rgba_u16_row(
&src[x * 2..],
&mut rgba_u16_out[x * 4..],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr444_to_rgb_row(src: &[u8], rgb_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out too short");
unsafe {
let mask4 = vdupq_n_u16(0x0F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b4 = vandq_u16(vshrq_n_u16::<8>(px), mask4);
let g4 = vandq_u16(vshrq_n_u16::<4>(px), mask4);
let r4 = vandq_u16(px, mask4);
let r_u8 = vqmovn_u16(expand4(r4));
let g_u8 = vqmovn_u16(expand4(g4));
let b_u8 = vqmovn_u16(expand4(b4));
vst3_u8(
rgb_out.as_mut_ptr().add(x * 3),
uint8x8x3_t(r_u8, g_u8, b_u8),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr444_to_rgb_row(&src[x * 2..], &mut rgb_out[x * 3..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr444_to_rgba_row(src: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out too short");
unsafe {
let mask4 = vdupq_n_u16(0x0F);
let alpha = vdup_n_u8(0xFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b4 = vandq_u16(vshrq_n_u16::<8>(px), mask4);
let g4 = vandq_u16(vshrq_n_u16::<4>(px), mask4);
let r4 = vandq_u16(px, mask4);
let r_u8 = vqmovn_u16(expand4(r4));
let g_u8 = vqmovn_u16(expand4(g4));
let b_u8 = vqmovn_u16(expand4(b4));
vst4_u8(
rgba_out.as_mut_ptr().add(x * 4),
uint8x8x4_t(r_u8, g_u8, b_u8, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr444_to_rgba_row(&src[x * 2..], &mut rgba_out[x * 4..], width - x);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr444_to_rgb_u16_row(src: &[u8], rgb_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgb_u16_out.len() >= width * 3, "rgb_u16_out too short");
unsafe {
let mask4 = vdupq_n_u16(0x0F);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b = vandq_u16(vshrq_n_u16::<8>(px), mask4);
let g = vandq_u16(vshrq_n_u16::<4>(px), mask4);
let r = vandq_u16(px, mask4);
vst3q_u16(rgb_u16_out.as_mut_ptr().add(x * 3), uint16x8x3_t(r, g, b));
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr444_to_rgb_u16_row(
&src[x * 2..],
&mut rgb_u16_out[x * 3..],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bgr444_to_rgba_u16_row(src: &[u8], rgba_u16_out: &mut [u16], width: usize) {
debug_assert!(src.len() >= width * 2, "src row too short");
debug_assert!(rgba_u16_out.len() >= width * 4, "rgba_u16_out too short");
unsafe {
let mask4 = vdupq_n_u16(0x0F);
let alpha = vdupq_n_u16(0xFFFF);
let mut x = 0usize;
while x + 8 <= width {
let px = load_u16x8_le(src.as_ptr().add(x * 2));
let b = vandq_u16(vshrq_n_u16::<8>(px), mask4);
let g = vandq_u16(vshrq_n_u16::<4>(px), mask4);
let r = vandq_u16(px, mask4);
vst4q_u16(
rgba_u16_out.as_mut_ptr().add(x * 4),
uint16x8x4_t(r, g, b, alpha),
);
x += 8;
}
if x < width {
scalar::legacy_rgb::bgr444_to_rgba_u16_row(
&src[x * 2..],
&mut rgba_u16_out[x * 4..],
width - x,
);
}
}
}