use super::*;
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn bgr_rgb_swap_row(input: &[u8], output: &mut [u8], width: usize) {
debug_assert!(input.len() >= width * 3, "input row too short");
debug_assert!(output.len() >= width * 3, "output row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
swap_rb_16_pixels(input.as_ptr().add(x * 3), output.as_mut_ptr().add(x * 3));
swap_rb_16_pixels(
input.as_ptr().add(x * 3 + 48),
output.as_mut_ptr().add(x * 3 + 48),
);
x += 32;
}
if x < width {
scalar::bgr_rgb_swap_row(
&input[x * 3..width * 3],
&mut output[x * 3..width * 3],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn rgba_to_rgb_row(rgba: &[u8], rgb_out: &mut [u8], width: usize) {
debug_assert!(rgba.len() >= width * 4, "rgba row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
drop_alpha_16_pixels(rgba.as_ptr().add(x * 4), rgb_out.as_mut_ptr().add(x * 3));
drop_alpha_16_pixels(
rgba.as_ptr().add(x * 4 + 64),
rgb_out.as_mut_ptr().add(x * 3 + 48),
);
x += 32;
}
if x < width {
scalar::rgba_to_rgb_row(
&rgba[x * 4..width * 4],
&mut rgb_out[x * 3..width * 3],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn bgra_to_rgba_row(bgra: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(bgra.len() >= width * 4, "bgra row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
let base_in = bgra.as_ptr().add(x * 4);
let base_out = rgba_out.as_mut_ptr().add(x * 4);
swap_rb_alpha_4_pixels(base_in, base_out);
swap_rb_alpha_4_pixels(base_in.add(16), base_out.add(16));
swap_rb_alpha_4_pixels(base_in.add(32), base_out.add(32));
swap_rb_alpha_4_pixels(base_in.add(48), base_out.add(48));
swap_rb_alpha_4_pixels(base_in.add(64), base_out.add(64));
swap_rb_alpha_4_pixels(base_in.add(80), base_out.add(80));
swap_rb_alpha_4_pixels(base_in.add(96), base_out.add(96));
swap_rb_alpha_4_pixels(base_in.add(112), base_out.add(112));
x += 32;
}
if x < width {
scalar::bgra_to_rgba_row(
&bgra[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn bgra_to_rgb_row(bgra: &[u8], rgb_out: &mut [u8], width: usize) {
debug_assert!(bgra.len() >= width * 4, "bgra row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
bgra_to_rgb_16_pixels(bgra.as_ptr().add(x * 4), rgb_out.as_mut_ptr().add(x * 3));
bgra_to_rgb_16_pixels(
bgra.as_ptr().add(x * 4 + 64),
rgb_out.as_mut_ptr().add(x * 3 + 48),
);
x += 32;
}
if x < width {
scalar::bgra_to_rgb_row(
&bgra[x * 4..width * 4],
&mut rgb_out[x * 3..width * 3],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn argb_to_rgb_row(argb: &[u8], rgb_out: &mut [u8], width: usize) {
debug_assert!(argb.len() >= width * 4, "argb row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
argb_to_rgb_16_pixels(argb.as_ptr().add(x * 4), rgb_out.as_mut_ptr().add(x * 3));
argb_to_rgb_16_pixels(
argb.as_ptr().add(x * 4 + 64),
rgb_out.as_mut_ptr().add(x * 3 + 48),
);
x += 32;
}
if x < width {
scalar::argb_to_rgb_row(
&argb[x * 4..width * 4],
&mut rgb_out[x * 3..width * 3],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn abgr_to_rgb_row(abgr: &[u8], rgb_out: &mut [u8], width: usize) {
debug_assert!(abgr.len() >= width * 4, "abgr row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
abgr_to_rgb_16_pixels(abgr.as_ptr().add(x * 4), rgb_out.as_mut_ptr().add(x * 3));
abgr_to_rgb_16_pixels(
abgr.as_ptr().add(x * 4 + 64),
rgb_out.as_mut_ptr().add(x * 3 + 48),
);
x += 32;
}
if x < width {
scalar::abgr_to_rgb_row(
&abgr[x * 4..width * 4],
&mut rgb_out[x * 3..width * 3],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn argb_to_rgba_row(argb: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(argb.len() >= width * 4, "argb row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
let base_in = argb.as_ptr().add(x * 4);
let base_out = rgba_out.as_mut_ptr().add(x * 4);
let mut off = 0usize;
while off < 128 {
argb_to_rgba_4_pixels(base_in.add(off), base_out.add(off));
off += 16;
}
x += 32;
}
if x < width {
scalar::argb_to_rgba_row(
&argb[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn abgr_to_rgba_row(abgr: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(abgr.len() >= width * 4, "abgr row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
let base_in = abgr.as_ptr().add(x * 4);
let base_out = rgba_out.as_mut_ptr().add(x * 4);
let mut off = 0usize;
while off < 128 {
abgr_to_rgba_4_pixels(base_in.add(off), base_out.add(off));
off += 16;
}
x += 32;
}
if x < width {
scalar::abgr_to_rgba_row(
&abgr[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn xrgb_to_rgba_row(xrgb: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(xrgb.len() >= width * 4, "xrgb row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
let base_in = xrgb.as_ptr().add(x * 4);
let base_out = rgba_out.as_mut_ptr().add(x * 4);
let mut off = 0usize;
while off < 128 {
xrgb_to_rgba_4_pixels(base_in.add(off), base_out.add(off));
off += 16;
}
x += 32;
}
if x < width {
scalar::xrgb_to_rgba_row(
&xrgb[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn rgbx_to_rgba_row(rgbx: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(rgbx.len() >= width * 4, "rgbx row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
let base_in = rgbx.as_ptr().add(x * 4);
let base_out = rgba_out.as_mut_ptr().add(x * 4);
let mut off = 0usize;
while off < 128 {
rgbx_to_rgba_4_pixels(base_in.add(off), base_out.add(off));
off += 16;
}
x += 32;
}
if x < width {
scalar::rgbx_to_rgba_row(
&rgbx[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn xbgr_to_rgba_row(xbgr: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(xbgr.len() >= width * 4, "xbgr row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
let base_in = xbgr.as_ptr().add(x * 4);
let base_out = rgba_out.as_mut_ptr().add(x * 4);
let mut off = 0usize;
while off < 128 {
xbgr_to_rgba_4_pixels(base_in.add(off), base_out.add(off));
off += 16;
}
x += 32;
}
if x < width {
scalar::xbgr_to_rgba_row(
&xbgr[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn bgrx_to_rgba_row(bgrx: &[u8], rgba_out: &mut [u8], width: usize) {
debug_assert!(bgrx.len() >= width * 4, "bgrx row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
let base_in = bgrx.as_ptr().add(x * 4);
let base_out = rgba_out.as_mut_ptr().add(x * 4);
let mut off = 0usize;
while off < 128 {
bgrx_to_rgba_4_pixels(base_in.add(off), base_out.add(off));
off += 16;
}
x += 32;
}
if x < width {
scalar::bgrx_to_rgba_row(
&bgrx[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn x2rgb10_to_rgb_row<const BE: bool>(
x2rgb10: &[u8],
rgb_out: &mut [u8],
width: usize,
) {
debug_assert!(x2rgb10.len() >= width * 4, "x2rgb10 row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
let base_in = x2rgb10.as_ptr().add(x * 4);
let base_out = rgb_out.as_mut_ptr().add(x * 3);
x2rgb10_to_rgb_16_pixels::<BE>(base_in, base_out);
x2rgb10_to_rgb_16_pixels::<BE>(base_in.add(64), base_out.add(48));
x += 32;
}
if x < width {
scalar::x2rgb10_to_rgb_row::<BE>(
&x2rgb10[x * 4..width * 4],
&mut rgb_out[x * 3..width * 3],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn x2rgb10_to_rgba_row<const BE: bool>(
x2rgb10: &[u8],
rgba_out: &mut [u8],
width: usize,
) {
debug_assert!(x2rgb10.len() >= width * 4, "x2rgb10 row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
let base_in = x2rgb10.as_ptr().add(x * 4);
let base_out = rgba_out.as_mut_ptr().add(x * 4);
x2rgb10_to_rgba_16_pixels::<BE>(base_in, base_out);
x2rgb10_to_rgba_16_pixels::<BE>(base_in.add(64), base_out.add(64));
x += 32;
}
if x < width {
scalar::x2rgb10_to_rgba_row::<BE>(
&x2rgb10[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn x2rgb10_to_rgb_u16_row<const BE: bool>(
x2rgb10: &[u8],
rgb_out: &mut [u16],
width: usize,
) {
debug_assert!(x2rgb10.len() >= width * 4, "x2rgb10 row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out row too short");
unsafe {
let mut x = 0usize;
while x + 16 <= width {
let base_in = x2rgb10.as_ptr().add(x * 4);
let base_out = rgb_out.as_mut_ptr().add(x * 3).cast::<u8>();
x2rgb10_to_rgb_u16_8_pixels::<BE>(base_in, base_out);
x2rgb10_to_rgb_u16_8_pixels::<BE>(base_in.add(32), base_out.add(48));
x += 16;
}
if x < width {
scalar::x2rgb10_to_rgb_u16_row::<BE>(
&x2rgb10[x * 4..width * 4],
&mut rgb_out[x * 3..width * 3],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn x2bgr10_to_rgb_row<const BE: bool>(
x2bgr10: &[u8],
rgb_out: &mut [u8],
width: usize,
) {
debug_assert!(x2bgr10.len() >= width * 4, "x2bgr10 row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
let base_in = x2bgr10.as_ptr().add(x * 4);
let base_out = rgb_out.as_mut_ptr().add(x * 3);
x2bgr10_to_rgb_16_pixels::<BE>(base_in, base_out);
x2bgr10_to_rgb_16_pixels::<BE>(base_in.add(64), base_out.add(48));
x += 32;
}
if x < width {
scalar::x2bgr10_to_rgb_row::<BE>(
&x2bgr10[x * 4..width * 4],
&mut rgb_out[x * 3..width * 3],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn x2bgr10_to_rgba_row<const BE: bool>(
x2bgr10: &[u8],
rgba_out: &mut [u8],
width: usize,
) {
debug_assert!(x2bgr10.len() >= width * 4, "x2bgr10 row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out row too short");
unsafe {
let mut x = 0usize;
while x + 32 <= width {
let base_in = x2bgr10.as_ptr().add(x * 4);
let base_out = rgba_out.as_mut_ptr().add(x * 4);
x2bgr10_to_rgba_16_pixels::<BE>(base_in, base_out);
x2bgr10_to_rgba_16_pixels::<BE>(base_in.add(64), base_out.add(64));
x += 32;
}
if x < width {
scalar::x2bgr10_to_rgba_row::<BE>(
&x2bgr10[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn x2bgr10_to_rgb_u16_row<const BE: bool>(
x2bgr10: &[u8],
rgb_out: &mut [u16],
width: usize,
) {
debug_assert!(x2bgr10.len() >= width * 4, "x2bgr10 row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out row too short");
unsafe {
let mut x = 0usize;
while x + 16 <= width {
let base_in = x2bgr10.as_ptr().add(x * 4);
let base_out = rgb_out.as_mut_ptr().add(x * 3).cast::<u8>();
x2bgr10_to_rgb_u16_8_pixels::<BE>(base_in, base_out);
x2bgr10_to_rgb_u16_8_pixels::<BE>(base_in.add(32), base_out.add(48));
x += 16;
}
if x < width {
scalar::x2bgr10_to_rgb_u16_row::<BE>(
&x2bgr10[x * 4..width * 4],
&mut rgb_out[x * 3..width * 3],
width - x,
);
}
}
}