#![cfg_attr(not(feature = "std"), allow(dead_code))]
use core::arch::aarch64::*;
use crate::row::scalar::mono1bit as scalar;
const BIT_MASK: [u8; 8] = [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01];
#[inline]
#[target_feature(enable = "neon")]
unsafe fn unpack_byte<const INVERT: bool>(b: u8) -> uint8x8_t {
unsafe {
let mask = vld1_u8(BIT_MASK.as_ptr());
let v = vdup_n_u8(b);
let result = vtst_u8(v, mask);
if INVERT { vmvn_u8(result) } else { result }
}
}
#[inline]
#[target_feature(enable = "neon")]
unsafe fn unpack_2bytes<const INVERT: bool>(b0: u8, b1: u8) -> uint8x16_t {
let lo = unsafe { unpack_byte::<INVERT>(b0) };
let hi = unsafe { unpack_byte::<INVERT>(b1) };
vcombine_u8(lo, hi)
}
#[inline]
#[target_feature(enable = "neon")]
unsafe fn expand_y_to_u16x8(y8: uint8x8_t) -> uint16x8_t {
vmovl_u8(y8)
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn mono1bit_to_rgb_row<const INVERT: bool>(
data: &[u8],
out: &mut [u8],
width: usize,
) {
debug_assert!(data.len() >= width.div_ceil(8));
debug_assert!(out.len() >= width * 3);
let mut x = 0usize;
let mut byte_idx = 0usize;
unsafe {
while x + 16 <= width {
let y = unpack_2bytes::<INVERT>(data[byte_idx], data[byte_idx + 1]);
let rgb = uint8x16x3_t(y, y, y);
vst3q_u8(out.as_mut_ptr().add(x * 3), rgb);
x += 16;
byte_idx += 2;
}
}
if x < width {
if INVERT {
scalar::monowhite_to_rgb_row(&data[byte_idx..], &mut out[x * 3..width * 3], width - x);
} else {
scalar::monoblack_to_rgb_row(&data[byte_idx..], &mut out[x * 3..width * 3], width - x);
}
}
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn mono1bit_to_rgba_row<const INVERT: bool>(
data: &[u8],
out: &mut [u8],
width: usize,
) {
debug_assert!(data.len() >= width.div_ceil(8));
debug_assert!(out.len() >= width * 4);
let mut x = 0usize;
let mut byte_idx = 0usize;
unsafe {
let alpha = vdupq_n_u8(0xFF);
while x + 16 <= width {
let y = unpack_2bytes::<INVERT>(data[byte_idx], data[byte_idx + 1]);
let rgba = uint8x16x4_t(y, y, y, alpha);
vst4q_u8(out.as_mut_ptr().add(x * 4), rgba);
x += 16;
byte_idx += 2;
}
}
if x < width {
if INVERT {
scalar::monowhite_to_rgba_row(&data[byte_idx..], &mut out[x * 4..width * 4], width - x);
} else {
scalar::monoblack_to_rgba_row(&data[byte_idx..], &mut out[x * 4..width * 4], width - x);
}
}
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn mono1bit_to_luma_row<const INVERT: bool>(
data: &[u8],
out: &mut [u8],
width: usize,
) {
debug_assert!(data.len() >= width.div_ceil(8));
debug_assert!(out.len() >= width);
let mut x = 0usize;
let mut byte_idx = 0usize;
unsafe {
while x + 16 <= width {
let y = unpack_2bytes::<INVERT>(data[byte_idx], data[byte_idx + 1]);
vst1q_u8(out.as_mut_ptr().add(x), y);
x += 16;
byte_idx += 2;
}
}
if x < width {
if INVERT {
scalar::monowhite_to_luma_row(&data[byte_idx..], &mut out[x..width], width - x);
} else {
scalar::monoblack_to_luma_row(&data[byte_idx..], &mut out[x..width], width - x);
}
}
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn mono1bit_to_rgb_u16_row<const INVERT: bool>(
data: &[u8],
out: &mut [u16],
width: usize,
) {
debug_assert!(data.len() >= width.div_ceil(8));
debug_assert!(out.len() >= width * 3);
let mut x = 0usize;
let mut byte_idx = 0usize;
unsafe {
while x + 8 <= width {
let y8 = unpack_byte::<INVERT>(data[byte_idx]);
let y16 = expand_y_to_u16x8(y8);
let rgb = uint16x8x3_t(y16, y16, y16);
vst3q_u16(out.as_mut_ptr().add(x * 3), rgb);
x += 8;
byte_idx += 1;
}
}
if x < width {
if INVERT {
scalar::monowhite_to_rgb_u16_row(&data[byte_idx..], &mut out[x * 3..width * 3], width - x);
} else {
scalar::monoblack_to_rgb_u16_row(&data[byte_idx..], &mut out[x * 3..width * 3], width - x);
}
}
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn mono1bit_to_rgba_u16_row<const INVERT: bool>(
data: &[u8],
out: &mut [u16],
width: usize,
) {
debug_assert!(data.len() >= width.div_ceil(8));
debug_assert!(out.len() >= width * 4);
let mut x = 0usize;
let mut byte_idx = 0usize;
unsafe {
let alpha = vdupq_n_u16(0x00FF);
while x + 8 <= width {
let y8 = unpack_byte::<INVERT>(data[byte_idx]);
let y16 = expand_y_to_u16x8(y8);
let rgba = uint16x8x4_t(y16, y16, y16, alpha);
vst4q_u16(out.as_mut_ptr().add(x * 4), rgba);
x += 8;
byte_idx += 1;
}
}
if x < width {
if INVERT {
scalar::monowhite_to_rgba_u16_row(&data[byte_idx..], &mut out[x * 4..width * 4], width - x);
} else {
scalar::monoblack_to_rgba_u16_row(&data[byte_idx..], &mut out[x * 4..width * 4], width - x);
}
}
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn mono1bit_to_luma_u16_row<const INVERT: bool>(
data: &[u8],
out: &mut [u16],
width: usize,
) {
debug_assert!(data.len() >= width.div_ceil(8));
debug_assert!(out.len() >= width);
let mut x = 0usize;
let mut byte_idx = 0usize;
unsafe {
while x + 8 <= width {
let y8 = unpack_byte::<INVERT>(data[byte_idx]);
let y16 = expand_y_to_u16x8(y8);
vst1q_u16(out.as_mut_ptr().add(x), y16);
x += 8;
byte_idx += 1;
}
}
if x < width {
if INVERT {
scalar::monowhite_to_luma_u16_row(&data[byte_idx..], &mut out[x..width], width - x);
} else {
scalar::monoblack_to_luma_u16_row(&data[byte_idx..], &mut out[x..width], width - x);
}
}
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn mono1bit_to_hsv_row<const INVERT: bool>(
data: &[u8],
h: &mut [u8],
s: &mut [u8],
v: &mut [u8],
width: usize,
) {
debug_assert!(data.len() >= width.div_ceil(8));
debug_assert!(h.len() >= width);
debug_assert!(s.len() >= width);
debug_assert!(v.len() >= width);
let mut x = 0usize;
let mut byte_idx = 0usize;
unsafe {
let zero = vdupq_n_u8(0);
while x + 16 <= width {
let y = unpack_2bytes::<INVERT>(data[byte_idx], data[byte_idx + 1]);
vst1q_u8(h.as_mut_ptr().add(x), zero);
vst1q_u8(s.as_mut_ptr().add(x), zero);
vst1q_u8(v.as_mut_ptr().add(x), y);
x += 16;
byte_idx += 2;
}
}
if x < width {
if INVERT {
scalar::monowhite_to_hsv_row(
&data[byte_idx..],
&mut h[x..width],
&mut s[x..width],
&mut v[x..width],
width - x,
);
} else {
scalar::monoblack_to_hsv_row(
&data[byte_idx..],
&mut h[x..width],
&mut s[x..width],
&mut v[x..width],
width - x,
);
}
}
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monoblack_to_rgb_row(data: &[u8], out: &mut [u8], width: usize) {
unsafe { mono1bit_to_rgb_row::<false>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monoblack_to_rgba_row(data: &[u8], out: &mut [u8], width: usize) {
unsafe { mono1bit_to_rgba_row::<false>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monoblack_to_rgb_u16_row(data: &[u8], out: &mut [u16], width: usize) {
unsafe { mono1bit_to_rgb_u16_row::<false>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monoblack_to_rgba_u16_row(data: &[u8], out: &mut [u16], width: usize) {
unsafe { mono1bit_to_rgba_u16_row::<false>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monoblack_to_luma_row(data: &[u8], out: &mut [u8], width: usize) {
unsafe { mono1bit_to_luma_row::<false>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monoblack_to_luma_u16_row(data: &[u8], out: &mut [u16], width: usize) {
unsafe { mono1bit_to_luma_u16_row::<false>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monoblack_to_hsv_row(
data: &[u8],
h: &mut [u8],
s: &mut [u8],
v: &mut [u8],
width: usize,
) {
unsafe { mono1bit_to_hsv_row::<false>(data, h, s, v, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monowhite_to_rgb_row(data: &[u8], out: &mut [u8], width: usize) {
unsafe { mono1bit_to_rgb_row::<true>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monowhite_to_rgba_row(data: &[u8], out: &mut [u8], width: usize) {
unsafe { mono1bit_to_rgba_row::<true>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monowhite_to_rgb_u16_row(data: &[u8], out: &mut [u16], width: usize) {
unsafe { mono1bit_to_rgb_u16_row::<true>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monowhite_to_rgba_u16_row(data: &[u8], out: &mut [u16], width: usize) {
unsafe { mono1bit_to_rgba_u16_row::<true>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monowhite_to_luma_row(data: &[u8], out: &mut [u8], width: usize) {
unsafe { mono1bit_to_luma_row::<true>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monowhite_to_luma_u16_row(data: &[u8], out: &mut [u16], width: usize) {
unsafe { mono1bit_to_luma_u16_row::<true>(data, out, width) }
}
#[allow(dead_code)]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn monowhite_to_hsv_row(
data: &[u8],
h: &mut [u8],
s: &mut [u8],
v: &mut [u8],
width: usize,
) {
unsafe { mono1bit_to_hsv_row::<true>(data, h, s, v, width) }
}