#![allow(clippy::match_same_arms, reason = "XXX")]
#![allow(clippy::similar_names, reason = "XXX")]
#![allow(clippy::cast_possible_wrap, reason = "XXX")]
#![allow(clippy::cast_possible_truncation, reason = "XXX")]
#[cfg(target_arch = "loongarch32")]
use core::arch::loongarch32::*;
#[cfg(target_arch = "loongarch64")]
use core::arch::loongarch64::*;
use core::mem::MaybeUninit;
use core::{hint, ptr, slice};
use crate::backend::generic::{decode_generic_unchecked, encode_generic_unchecked};
use crate::error::InvalidInput;
use crate::util::{lut16, lut32};
#[target_feature(enable = "lsx")]
pub(crate) unsafe fn encode_lsx_unchecked<const UPPER: bool>(
src: &[u8],
dst: &mut [[MaybeUninit<u8>; 2]],
) {
match src.len() {
0..16 => encode_generic_unchecked::<UPPER>(src, dst),
16 => encode_lsx_unchecked_v128_exact::<UPPER>(src, dst),
17..=32 => encode_lsx_unchecked_v128_overlapped::<UPPER>(src, dst),
33.. => encode_lsx_unchecked_v128_with_trailing::<UPPER>(src, dst),
}
}
#[target_feature(enable = "lsx")]
unsafe fn encode_lsx_unchecked_v128_exact<const UPPER: bool>(
src: &[u8],
dst: &mut [[MaybeUninit<u8>; 2]],
) {
#[allow(clippy::identity_op, reason = "XXX")]
const BATCH_ELEMS_V128_X1: usize = size_of::<m128i>() * 1;
debug_assert_eq!(src.len(), BATCH_ELEMS_V128_X1);
debug_assert!(src.len() <= dst.len());
let mask = lsx_vldi::<0b_0000_1111>();
let lut = lsx_vld::<0>(lut16::<UPPER>().as_ptr().cast());
let encode_v128 = |src: *const i8, dst: *mut i8| {
let chunk = lsx_vld::<0>(src);
let hi = lsx_vsrli_b::<4>(chunk);
let lo = lsx_vand_v(chunk, mask);
let hi = lsx_vshuf_b(lut, lut, hi);
let lo = lsx_vshuf_b(lut, lut, lo);
let out0 = lsx_vilvl_b(lo, hi);
let out1 = lsx_vilvh_b(lo, hi);
lsx_vst::<0>(out0, dst);
lsx_vst::<{ size_of::<m128i>() as i32 }>(out1, dst);
};
encode_v128(src.as_ptr().cast(), dst.as_mut_ptr().cast());
}
#[target_feature(enable = "lsx")]
unsafe fn encode_lsx_unchecked_v128_overlapped<const UPPER: bool>(
src: &[u8],
dst: &mut [[MaybeUninit<u8>; 2]],
) {
#[allow(clippy::identity_op, reason = "XXX")]
const BATCH_ELEMS_V128_X1: usize = size_of::<m128i>() * 1;
#[allow(clippy::identity_op, reason = "XXX")]
const BATCH_ELEMS_V128_X2: usize = size_of::<m128i>() * 2;
debug_assert!(src.len() >= BATCH_ELEMS_V128_X1);
debug_assert!(src.len() <= BATCH_ELEMS_V128_X2);
debug_assert!(src.len() <= dst.len());
let mask = lsx_vldi::<0b_0000_1111>();
let lut = lsx_vld::<0>(lut16::<UPPER>().as_ptr().cast());
let encode_v128 = |src: *const i8, dst: *mut i8| {
let chunk = lsx_vld::<0>(src);
let hi = lsx_vsrli_b::<4>(chunk);
let lo = lsx_vand_v(chunk, mask);
let hi = lsx_vshuf_b(lut, lut, hi);
let lo = lsx_vshuf_b(lut, lut, lo);
let out0 = lsx_vilvl_b(lo, hi);
let out1 = lsx_vilvh_b(lo, hi);
lsx_vst::<0>(out0, dst);
lsx_vst::<{ size_of::<m128i>() as i32 }>(out1, dst);
};
encode_v128(src.as_ptr().cast(), dst.as_mut_ptr().cast());
encode_v128(
src.as_ptr().add(src.len()).cast::<m128i>().sub(1).cast(),
dst.as_mut_ptr()
.add(src.len())
.cast::<m128i>()
.sub(2)
.cast(),
);
}
#[target_feature(enable = "lsx")]
unsafe fn encode_lsx_unchecked_v128_with_trailing<const UPPER: bool>(
src: &[u8],
dst: &mut [[MaybeUninit<u8>; 2]],
) {
#[allow(clippy::identity_op, reason = "XXX")]
const BATCH_ELEMS_V128_X1: usize = size_of::<m128i>() * 1;
debug_assert!(src.len() >= BATCH_ELEMS_V128_X1);
debug_assert!(src.len() <= dst.len());
let mask = lsx_vldi::<0b_0000_1111>();
let lut = lsx_vld::<0>(lut16::<UPPER>().as_ptr().cast());
let encode_v128 = |src: *const i8, dst: *mut i8| {
let chunk = lsx_vld::<0>(src);
let hi = lsx_vsrli_b::<4>(chunk);
let lo = lsx_vand_v(chunk, mask);
let hi = lsx_vshuf_b(lut, lut, hi);
let lo = lsx_vshuf_b(lut, lut, lo);
let out0 = lsx_vilvl_b(lo, hi);
let out1 = lsx_vilvh_b(lo, hi);
lsx_vst::<0>(out0, dst);
lsx_vst::<{ size_of::<m128i>() as i32 }>(out1, dst);
};
let batches = src.len() / BATCH_ELEMS_V128_X1;
let remainder = src.len() % BATCH_ELEMS_V128_X1;
for i in 0..batches {
encode_v128(
src.as_ptr().cast::<m128i>().add(i).cast(),
dst.as_mut_ptr().cast::<m128i>().add(i * 2).cast(),
);
}
encode_generic_unchecked::<UPPER>(
slice::from_raw_parts(src.as_ptr().add(batches * BATCH_ELEMS_V128_X1), remainder),
slice::from_raw_parts_mut(
dst.as_mut_ptr().add(batches * BATCH_ELEMS_V128_X1),
remainder,
),
);
}
#[target_feature(enable = "lasx")]
pub(crate) unsafe fn encode_lasx_unchecked<const UPPER: bool>(
src: &[u8],
dst: &mut [[MaybeUninit<u8>; 2]],
) {
match src.len() {
0..16 => encode_generic_unchecked::<UPPER>(src, dst),
16 => encode_lsx_unchecked_v128_exact::<UPPER>(src, dst),
17..=32 => encode_lsx_unchecked_v128_overlapped::<UPPER>(src, dst),
33..=64 => encode_lasx_unchecked_v256_overlapped::<UPPER>(src, dst),
65.. => encode_lasx_unchecked_v256_with_trailing::<UPPER>(src, dst),
}
}
#[target_feature(enable = "lasx")]
unsafe fn encode_lasx_unchecked_v256_overlapped<const UPPER: bool>(
src: &[u8],
dst: &mut [[MaybeUninit<u8>; 2]],
) {
#[allow(clippy::identity_op, reason = "XXX")]
const BATCH_ELEMS_V256_X1: usize = size_of::<m256i>() * 1;
#[allow(clippy::identity_op, reason = "XXX")]
const BATCH_ELEMS_V256_X2: usize = size_of::<m256i>() * 2;
debug_assert!(src.len() >= BATCH_ELEMS_V256_X1);
debug_assert!(src.len() <= BATCH_ELEMS_V256_X2);
debug_assert!(src.len() <= dst.len());
let mask = lasx_xvldi::<0b_0000_1111>();
let lut = lasx_xvld::<0>(lut32::<UPPER>().as_ptr().cast());
let encode_v256 = |src: *const i8, dst: *mut i8| {
let chunk = lasx_xvld::<0>(src);
let hi = lasx_xvsrli_b::<4>(chunk);
let lo = lasx_xvand_v(chunk, mask);
let ac = lasx_xvilvl_b(lo, hi);
let bd = lasx_xvilvh_b(lo, hi);
let ab = lasx_xvpermi_q::<0x02>(ac, bd);
let cd = lasx_xvpermi_q::<0x13>(ac, bd);
let out0 = lasx_xvshuf_b(lut, lut, ab);
let out1 = lasx_xvshuf_b(lut, lut, cd);
lasx_xvst::<0>(out0, dst);
lasx_xvst::<{ size_of::<m256i>() as i32 }>(out1, dst);
};
encode_v256(src.as_ptr().cast(), dst.as_mut_ptr().cast());
encode_v256(
src.as_ptr().add(src.len()).cast::<m256i>().sub(1).cast(),
dst.as_mut_ptr()
.add(src.len())
.cast::<m256i>()
.sub(2)
.cast(),
);
}
#[target_feature(enable = "lasx")]
unsafe fn encode_lasx_unchecked_v256_with_trailing<const UPPER: bool>(
src: &[u8],
dst: &mut [[MaybeUninit<u8>; 2]],
) {
#[allow(clippy::identity_op, reason = "XXX")]
const BATCH_ELEMS_V256_X1: usize = size_of::<m256i>() * 1;
debug_assert!(src.len() >= BATCH_ELEMS_V256_X1);
debug_assert!(src.len() <= dst.len());
let mask = lasx_xvldi::<0b_0000_1111>();
let lut = lasx_xvld::<0>(lut32::<UPPER>().as_ptr().cast());
let batches = src.len() / BATCH_ELEMS_V256_X1;
let remainder = src.len() % BATCH_ELEMS_V256_X1;
let encode_v256 = |src: *const i8, dst: *mut i8| {
let chunk = lasx_xvld::<0>(src);
let hi = lasx_xvsrli_b::<4>(chunk);
let lo = lasx_xvand_v(chunk, mask);
let ac = lasx_xvilvl_b(lo, hi);
let bd = lasx_xvilvh_b(lo, hi);
let ab = lasx_xvpermi_q::<0x02>(ac, bd);
let cd = lasx_xvpermi_q::<0x13>(ac, bd);
let out0 = lasx_xvshuf_b(lut, lut, ab);
let out1 = lasx_xvshuf_b(lut, lut, cd);
lasx_xvst::<0>(out0, dst);
lasx_xvst::<{ size_of::<m256i>() as i32 }>(out1, dst);
};
for i in 0..batches {
encode_v256(
src.as_ptr().cast::<m256i>().add(i).cast(),
dst.as_mut_ptr().cast::<m256i>().add(i * 2).cast(),
);
}
encode_generic_unchecked::<UPPER>(
slice::from_raw_parts(src.as_ptr().add(batches * BATCH_ELEMS_V256_X1), remainder),
slice::from_raw_parts_mut(
dst.as_mut_ptr().add(batches * BATCH_ELEMS_V256_X1),
remainder,
),
);
}
#[target_feature(enable = "lsx")]
#[inline]
pub(crate) unsafe fn decode_lsx_unchecked(
src: *const [[u8; 2]],
dst: *mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
match src.len() {
8 => decode_lsx_unchecked_v128_exact(src, dst),
16 => decode_lsx_unchecked_v128x2_exact(src, dst),
0..8 => decode_generic_unchecked::<false>(src, dst),
9..16 => decode_lsx_unchecked_v128_overlapped(src, dst),
17.. => decode_lsx_unchecked_v128x2_with_trailing(src, dst),
}
}
#[target_feature(enable = "lsx")]
#[inline]
unsafe fn decode_lsx_unchecked_v128_exact(
src: *const [[u8; 2]],
dst: *mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
#[allow(clippy::identity_op, reason = "XXX")]
const BATCH_ELEMS_V128_X1: usize = size_of::<m128i>() / 2 * 1;
debug_assert_eq!(src.len(), BATCH_ELEMS_V128_X1);
debug_assert!(src.len() <= dst.len());
let n_c6 = lsx_vldi::<{ (0xFF_u8 - b'9') as i32 }>();
let n_06 = lsx_vldi::<0x06>();
let n_f0 = lsx_vldi::<0xF0>();
let n_df = lsx_vldi::<0xDF>();
let u_a = lsx_vldi::<{ b'A' as i32 }>();
let n_0a = lsx_vldi::<0x0A>();
let n_0f = lsx_vldi::<0x0F>();
let mut invalid = false;
let mut decode_v128 = |src: *const i8, dst: *mut i8| {
let chunk = lsx_vld::<0>(src);
let n = {
let d = lsx_vsub_b(lsx_vssub_bu(lsx_vadd_b(chunk, n_c6), n_06), n_f0);
let a = lsx_vsadd_bu(lsx_vsub_b(lsx_vand_v(chunk, n_df), u_a), n_0a);
lsx_vmin_bu(d, a)
};
invalid |= lsx_bz_v(lsx_vslt_bu(n_0f, n)) == 0;
let bb = {
let hi = lsx_vpickev_b(n, n);
let lo = lsx_vpickod_b(n, n);
lsx_vor_v(lsx_vslli_b::<4>(hi), lo)
};
lsx_vstelm_d::<0, 0>(bb, dst.cast());
};
decode_v128(src.cast::<m128i>().cast(), dst.cast::<m128i>().cast());
if invalid {
return Err(InvalidInput);
}
Ok(())
}
#[target_feature(enable = "lsx")]
#[inline]
unsafe fn decode_lsx_unchecked_v128_overlapped(
src: *const [[u8; 2]],
dst: *mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
#[allow(clippy::identity_op, reason = "XXX")]
const BATCH_ELEMS_V128_X1: usize = size_of::<m128i>() / 2 * 1;
#[allow(clippy::identity_op, reason = "XXX")]
const BATCH_ELEMS_V128_X2: usize = size_of::<m128i>() / 2 * 2;
debug_assert!(src.len() >= BATCH_ELEMS_V128_X1);
debug_assert!(src.len() <= BATCH_ELEMS_V128_X2);
debug_assert!(src.len() <= dst.len());
let n_c6 = lsx_vldi::<{ (0xFF_u8 - b'9') as i32 }>();
let n_06 = lsx_vldi::<0x06>();
let n_f0 = lsx_vldi::<0xF0>();
let n_df = lsx_vldi::<0xDF>();
let u_a = lsx_vldi::<{ b'A' as i32 }>();
let n_0a = lsx_vldi::<0x0A>();
let n_0f = lsx_vldi::<0x0F>();
let mut invalid = false;
let mut decode_v128 = |src: *const i8, dst: *mut i8| {
let chunk = lsx_vld::<0>(src);
let n = {
let d = lsx_vsub_b(lsx_vssub_bu(lsx_vadd_b(chunk, n_c6), n_06), n_f0);
let a = lsx_vsadd_bu(lsx_vsub_b(lsx_vand_v(chunk, n_df), u_a), n_0a);
lsx_vmin_bu(d, a)
};
invalid |= lsx_bz_v(lsx_vslt_bu(n_0f, n)) == 0;
let bb = {
let hi = lsx_vpickev_b(n, n);
let lo = lsx_vpickod_b(n, n);
lsx_vor_v(lsx_vslli_b::<4>(hi), lo)
};
lsx_vstelm_d::<0, 0>(bb, dst.cast());
};
decode_v128(src.cast::<m128i>().cast(), dst.cast::<m128i>().cast());
decode_v128(
src.cast::<[u8; 2]>()
.add(src.len())
.cast::<m128i>()
.sub(1)
.cast(),
dst.cast::<MaybeUninit<u8>>()
.add(src.len())
.sub(size_of::<m128i>() / 2)
.cast::<m128i>()
.cast(),
);
if invalid {
return Err(InvalidInput);
}
Ok(())
}
#[target_feature(enable = "lsx")]
#[inline]
unsafe fn decode_lsx_unchecked_v128x2_exact(
src: *const [[u8; 2]],
dst: *mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
const BATCH_ELEMS_V128_X2: usize = size_of::<m128i>() / 2 * 2;
debug_assert_eq!(src.len(), BATCH_ELEMS_V128_X2);
debug_assert!(src.len() <= dst.len());
let n_c6 = lsx_vldi::<{ (0xFF_u8 - b'9') as i32 }>();
let n_06 = lsx_vldi::<0x06>();
let n_f0 = lsx_vldi::<0xF0>();
let n_df = lsx_vldi::<0xDF>();
let u_a = lsx_vldi::<{ b'A' as i32 }>();
let n_0a = lsx_vldi::<0x0A>();
let n_0f = lsx_vldi::<0x0F>();
let mut invalid = false;
let mut decode_v128x2 = |src: *const i8, dst: *mut i8| {
let chunk0 = lsx_vld::<0>(src);
let chunk1 = lsx_vld::<{ size_of::<m128i>() as i32 }>(src);
let n0 = {
let d = lsx_vsub_b(lsx_vssub_bu(lsx_vadd_b(chunk0, n_c6), n_06), n_f0);
let a = lsx_vsadd_bu(lsx_vsub_b(lsx_vand_v(chunk0, n_df), u_a), n_0a);
lsx_vmin_bu(d, a)
};
let n1 = {
let d = lsx_vsub_b(lsx_vssub_bu(lsx_vadd_b(chunk1, n_c6), n_06), n_f0);
let a = lsx_vsadd_bu(lsx_vsub_b(lsx_vand_v(chunk1, n_df), u_a), n_0a);
lsx_vmin_bu(d, a)
};
invalid |= lsx_bz_v(lsx_vor_v(lsx_vslt_bu(n_0f, n0), lsx_vslt_bu(n_0f, n1))) == 0;
let b01 = {
let hi01 = lsx_vpickev_b(n1, n0);
let lo01 = lsx_vpickod_b(n1, n0);
lsx_vor_v(lsx_vslli_b::<4>(hi01), lo01)
};
lsx_vst::<0>(b01, dst);
};
decode_v128x2(src.cast::<m128i>().cast(), dst.cast::<m128i>().cast());
if invalid {
return Err(InvalidInput);
}
Ok(())
}
#[target_feature(enable = "lsx")]
#[inline]
unsafe fn decode_lsx_unchecked_v128x2_with_trailing(
src: *const [[u8; 2]],
dst: *mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
const BATCH_ELEMS_V128_X2: usize = size_of::<m128i>() / 2 * 2;
debug_assert!(src.len() >= BATCH_ELEMS_V128_X2);
debug_assert!(src.len() <= dst.len());
let n_c6 = lsx_vldi::<{ (0xFF_u8 - b'9') as i32 }>();
let n_06 = lsx_vldi::<0x06>();
let n_f0 = lsx_vldi::<0xF0>();
let n_df = lsx_vldi::<0xDF>();
let u_a = lsx_vldi::<{ b'A' as i32 }>();
let n_0a = lsx_vldi::<0x0A>();
let n_0f = lsx_vldi::<0x0F>();
let batches = src.len() / BATCH_ELEMS_V128_X2;
let remainder = src.len() % BATCH_ELEMS_V128_X2;
let mut invalid = false;
let mut decode_v128x2 = |src: *const i8, dst: *mut i8| {
let chunk0 = lsx_vld::<0>(src);
let chunk1 = lsx_vld::<{ size_of::<m128i>() as i32 }>(src);
let n0 = {
let d = lsx_vsub_b(lsx_vssub_bu(lsx_vadd_b(chunk0, n_c6), n_06), n_f0);
let a = lsx_vsadd_bu(lsx_vsub_b(lsx_vand_v(chunk0, n_df), u_a), n_0a);
lsx_vmin_bu(d, a)
};
let n1 = {
let d = lsx_vsub_b(lsx_vssub_bu(lsx_vadd_b(chunk1, n_c6), n_06), n_f0);
let a = lsx_vsadd_bu(lsx_vsub_b(lsx_vand_v(chunk1, n_df), u_a), n_0a);
lsx_vmin_bu(d, a)
};
invalid |= lsx_bz_v(lsx_vor_v(lsx_vslt_bu(n_0f, n0), lsx_vslt_bu(n_0f, n1))) == 0;
let b01 = {
let hi01 = lsx_vpickev_b(n1, n0);
let lo01 = lsx_vpickod_b(n1, n0);
lsx_vor_v(lsx_vslli_b::<4>(hi01), lo01)
};
lsx_vst::<0>(b01, dst);
};
for i in 0..batches {
decode_v128x2(
src.cast::<m128i>().add(i * 2).cast(),
dst.cast::<m128i>().add(i).cast(),
);
}
if invalid {
return Err(InvalidInput);
}
let src = ptr::slice_from_raw_parts(
src.cast::<[u8; 2]>().add(batches * BATCH_ELEMS_V128_X2),
remainder,
);
let dst = ptr::slice_from_raw_parts_mut(
dst.cast::<MaybeUninit<u8>>()
.add(batches * BATCH_ELEMS_V128_X2),
remainder,
);
match src.len() {
8 => decode_lsx_unchecked_v128_exact(src, dst),
16 => hint::unreachable_unchecked(),
0..8 => decode_generic_unchecked::<false>(src, dst),
9..16 => decode_lsx_unchecked_v128_overlapped(src, dst),
17.. => hint::unreachable_unchecked(),
}
}
#[target_feature(enable = "lasx")]
#[inline]
pub(crate) unsafe fn decode_lasx_unchecked(
src: *const [[u8; 2]],
dst: *mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
match src.len() {
8 => decode_lsx_unchecked_v128_exact(src, dst),
16 => decode_lsx_unchecked_v128x2_exact(src, dst),
32 => decode_lasx_unchecked_v256x2_exact(src, dst),
0..8 => decode_generic_unchecked::<false>(src, dst),
9..16 => decode_lsx_unchecked_v128_overlapped(src, dst),
17..32 => decode_lsx_unchecked_v128x2_with_trailing(src, dst),
33.. => decode_lasx_unchecked_v256x2_with_trailing(src, dst),
}
}
#[target_feature(enable = "lasx")]
#[inline]
unsafe fn decode_lasx_unchecked_v256x2_exact(
src: *const [[u8; 2]],
dst: *mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
const BATCH_ELEMS_V256_X2: usize = size_of::<m256i>() / 2 * 2;
debug_assert_eq!(src.len(), BATCH_ELEMS_V256_X2);
debug_assert!(src.len() <= dst.len());
let n_c6 = lasx_xvldi::<{ (0xFF_u8 - b'9') as i32 }>();
let n_06 = lasx_xvldi::<0x06>();
let n_f0 = lasx_xvldi::<0xF0>();
let n_df = lasx_xvldi::<0xDF>();
let u_a = lasx_xvldi::<{ b'A' as i32 }>();
let n_0a = lasx_xvldi::<0x0A>();
let n_0f = lasx_xvldi::<0x0F>();
let mut invalid = false;
let mut decode_v256x2 = |src: *const i8, dst: *mut i8| {
let chunk0 = lasx_xvld::<0>(src);
let chunk1 = lasx_xvld::<{ size_of::<m256i>() as i32 }>(src);
let n0 = {
let d = lasx_xvsub_b(lasx_xvssub_bu(lasx_xvadd_b(chunk0, n_c6), n_06), n_f0);
let a = lasx_xvsadd_bu(lasx_xvsub_b(lasx_xvand_v(chunk0, n_df), u_a), n_0a);
lasx_xvmin_bu(d, a)
};
let n1 = {
let d = lasx_xvsub_b(lasx_xvssub_bu(lasx_xvadd_b(chunk1, n_c6), n_06), n_f0);
let a = lasx_xvsadd_bu(lasx_xvsub_b(lasx_xvand_v(chunk1, n_df), u_a), n_0a);
lasx_xvmin_bu(d, a)
};
invalid |= lasx_xbz_v(lasx_xvor_v(
lasx_xvslt_bu(n_0f, n0),
lasx_xvslt_bu(n_0f, n1),
)) == 0;
let b01 = {
let hi01 = lasx_xvpickev_b(n1, n0);
let lo01 = lasx_xvpickod_b(n1, n0);
lasx_xvpermi_d::<0b_11_01_10_00>(lasx_xvor_v(lasx_xvslli_b::<4>(hi01), lo01))
};
lasx_xvst::<0>(b01, dst);
};
decode_v256x2(src.cast::<m256i>().cast(), dst.cast::<m256i>().cast());
if invalid {
return Err(InvalidInput);
}
Ok(())
}
#[target_feature(enable = "lasx")]
#[inline]
unsafe fn decode_lasx_unchecked_v256x2_with_trailing(
src: *const [[u8; 2]],
dst: *mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
const BATCH_ELEMS_V256_X2: usize = size_of::<m256i>() / 2 * 2;
debug_assert!(src.len() >= BATCH_ELEMS_V256_X2);
debug_assert!(src.len() <= dst.len());
let n_c6 = lasx_xvldi::<{ (0xFF_u8 - b'9') as i32 }>();
let n_06 = lasx_xvldi::<0x06>();
let n_f0 = lasx_xvldi::<0xF0>();
let n_df = lasx_xvldi::<0xDF>();
let u_a = lasx_xvldi::<{ b'A' as i32 }>();
let n_0a = lasx_xvldi::<0x0A>();
let n_0f = lasx_xvldi::<0x0F>();
let batches = src.len() / BATCH_ELEMS_V256_X2;
let remainder = src.len() % BATCH_ELEMS_V256_X2;
let mut invalid = false;
let mut decode_v256x2 = |src: *const i8, dst: *mut i8| {
let chunk0 = lasx_xvld::<0>(src);
let chunk1 = lasx_xvld::<{ size_of::<m256i>() as i32 }>(src);
let n0 = {
let d = lasx_xvsub_b(lasx_xvssub_bu(lasx_xvadd_b(chunk0, n_c6), n_06), n_f0);
let a = lasx_xvsadd_bu(lasx_xvsub_b(lasx_xvand_v(chunk0, n_df), u_a), n_0a);
lasx_xvmin_bu(d, a)
};
let n1 = {
let d = lasx_xvsub_b(lasx_xvssub_bu(lasx_xvadd_b(chunk1, n_c6), n_06), n_f0);
let a = lasx_xvsadd_bu(lasx_xvsub_b(lasx_xvand_v(chunk1, n_df), u_a), n_0a);
lasx_xvmin_bu(d, a)
};
invalid |= lasx_xbz_v(lasx_xvor_v(
lasx_xvslt_bu(n_0f, n0),
lasx_xvslt_bu(n_0f, n1),
)) == 0;
let b01 = {
let hi01 = lasx_xvpickev_b(n1, n0);
let lo01 = lasx_xvpickod_b(n1, n0);
lasx_xvpermi_d::<0b_11_01_10_00>(lasx_xvor_v(lasx_xvslli_b::<4>(hi01), lo01))
};
lasx_xvst::<0>(b01, dst);
};
for i in 0..batches {
decode_v256x2(
src.cast::<m256i>().add(i * 2).cast(),
dst.cast::<m256i>().add(i).cast(),
);
}
if invalid {
return Err(InvalidInput);
}
let src = ptr::slice_from_raw_parts(
src.cast::<[u8; 2]>().add(batches * BATCH_ELEMS_V256_X2),
remainder,
);
let dst = ptr::slice_from_raw_parts_mut(
dst.cast::<MaybeUninit<u8>>()
.add(batches * BATCH_ELEMS_V256_X2),
remainder,
);
match src.len() {
8 => decode_lsx_unchecked_v128_exact(src, dst),
16 => decode_lsx_unchecked_v128x2_exact(src, dst),
32 => hint::unreachable_unchecked(),
0..8 => decode_generic_unchecked::<false>(src, dst),
9..16 => decode_lsx_unchecked_v128_overlapped(src, dst),
17..32 => decode_lsx_unchecked_v128x2_with_trailing(src, dst),
33.. => hint::unreachable_unchecked(),
}
}
#[cfg(test)]
mod smoking {
use super::*;
use crate::backend::tests::{
check_decode_validation_any_backend, check_encode_decode_any_backend,
};
#[test]
#[cfg_attr(any(miri, not(target_feature = "lsx")), ignore)]
fn test_encode_decode_lsx() {
check_encode_decode_any_backend::<true>(
encode_lsx_unchecked::<true>,
decode_generic_unchecked::<false>,
);
check_encode_decode_any_backend::<false>(
encode_lsx_unchecked::<false>,
decode_generic_unchecked::<false>,
);
check_encode_decode_any_backend::<true>(encode_lsx_unchecked::<true>, decode_lsx_unchecked);
check_encode_decode_any_backend::<false>(
encode_lsx_unchecked::<false>,
decode_lsx_unchecked,
);
}
#[test]
#[cfg_attr(any(miri, not(target_feature = "lasx")), ignore)]
fn test_encode_decode_lasx() {
check_encode_decode_any_backend::<true>(
encode_lasx_unchecked::<true>,
decode_generic_unchecked::<false>,
);
check_encode_decode_any_backend::<false>(
encode_lasx_unchecked::<false>,
decode_generic_unchecked::<false>,
);
check_encode_decode_any_backend::<true>(
encode_lasx_unchecked::<true>,
decode_lasx_unchecked,
);
check_encode_decode_any_backend::<false>(
encode_lasx_unchecked::<false>,
decode_lasx_unchecked,
);
}
#[test]
#[cfg_attr(any(miri, not(target_feature = "lsx")), ignore)]
fn test_decode_validation_lsx() {
check_decode_validation_any_backend(decode_lsx_unchecked);
}
#[test]
#[cfg_attr(any(miri, not(target_feature = "lasx")), ignore)]
fn test_decode_validation_lasx() {
check_decode_validation_any_backend(decode_lasx_unchecked);
}
}