#![allow(clippy::similar_names, reason = "XXX")]
use core::mem::MaybeUninit;
use core::simd::prelude::*;
use core::{ptr, slice};
use crate::backend::generic::{decode_generic_unchecked, encode_generic_unchecked};
use crate::error::InvalidInput;
use crate::util::lut16;
#[cfg_attr(
any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "loongarch32",
target_arch = "loongarch64",
target_arch = "wasm32"
),
allow(dead_code, reason = "XXX")
)]
pub(crate) unsafe fn encode_simd128_unchecked<const UPPER: bool>(
src: &[u8],
dst: &mut [[MaybeUninit<u8>; 2]],
) {
#[allow(clippy::identity_op, reason = "XXX")]
const BATCH_ELEMS_V128_X1: usize = size_of::<u8x16>() * 1;
debug_assert!(src.len() <= dst.len());
if src.len() >= BATCH_ELEMS_V128_X1 {
let mask = u8x16::splat(0b_0000_1111);
let lut = u8x16::from_slice(lut16::<UPPER>());
let batches = src.len() / BATCH_ELEMS_V128_X1;
let remainder = src.len() % BATCH_ELEMS_V128_X1;
for i in 0..batches {
let chunk =
u8x16::from_slice(&src[i * BATCH_ELEMS_V128_X1..(i + 1) * BATCH_ELEMS_V128_X1]);
let mut hi = chunk >> 4;
let mut lo = chunk & mask;
lo = lut.swizzle_dyn(lo);
hi = lut.swizzle_dyn(hi);
let (out0, out1) = u8x16::interleave(hi, lo);
ptr::copy_nonoverlapping(
out0.as_array().as_ptr(),
dst.as_mut_ptr().cast::<u8x16>().add(2 * i).cast(),
out0.len(),
);
ptr::copy_nonoverlapping(
out1.as_array().as_ptr(),
dst.as_mut_ptr().cast::<u8x16>().add(2 * i + 1).cast(),
out1.len(),
);
}
encode_generic_unchecked::<UPPER>(
slice::from_raw_parts(src.as_ptr().add(batches * BATCH_ELEMS_V128_X1), remainder),
slice::from_raw_parts_mut(
dst.as_mut_ptr().add(batches * BATCH_ELEMS_V128_X1),
remainder,
),
);
} else {
encode_generic_unchecked::<UPPER>(src, dst);
}
}
#[cfg_attr(
any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "loongarch32",
target_arch = "loongarch64",
target_arch = "wasm32"
),
allow(dead_code, reason = "XXX")
)]
#[allow(clippy::cast_possible_wrap, reason = "XXX")]
pub(crate) unsafe fn decode_simd128_unchecked(
src: &[[u8; 2]],
dst: &mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
const BATCH_ELEMS_V128_X2: usize = size_of::<i8x16>() / 2 * 2;
if src.len() >= BATCH_ELEMS_V128_X2 {
let n_c6 = i8x16::splat((0xFF_u8 - b'9') as i8);
let n_06 = i8x16::splat(0x06_i8);
let n_f0 = i8x16::splat(0xF0_u8 as i8);
let n_df = i8x16::splat(0xDF_u8 as i8);
let u_a = i8x16::splat(b'A' as i8);
let n_0a = i8x16::splat(0x0A_i8);
let n_0f = u8x16::splat(15);
let batches = src.len() / BATCH_ELEMS_V128_X2;
let remainder = src.len() % BATCH_ELEMS_V128_X2;
let mut invalid = false;
let mut decode_v128x2 = |src: *const u8, dst: *mut MaybeUninit<u8>| {
let chunk0 =
u8x16::from_slice(slice::from_raw_parts(src, size_of::<u8x16>())).cast::<i8>();
let chunk1 = u8x16::from_slice(slice::from_raw_parts(
src.add(size_of::<u8x16>()),
size_of::<u8x16>(),
))
.cast::<i8>();
let n0 = {
let d = (chunk0 + n_c6)
.cast::<u8>()
.saturating_sub(n_06.cast::<u8>())
.cast::<i8>()
- n_f0;
let a = ((chunk0 & n_df) - u_a)
.cast::<u8>()
.saturating_add(n_0a.cast::<u8>());
d.cast::<u8>().simd_min(a)
};
let n1 = {
let d = (chunk1 + n_c6)
.cast::<u8>()
.saturating_sub(n_06.cast::<u8>())
.cast::<i8>()
- n_f0;
let a = ((chunk1 & n_df) - u_a)
.cast::<u8>()
.saturating_add(n_0a.cast::<u8>());
d.cast::<u8>().simd_min(a)
};
invalid |= (n0 | n1).simd_gt(n_0f).any();
let b = {
let (hi, lo) = Simd::deinterleave(n0, n1);
(hi << 4) | lo
};
ptr::copy_nonoverlapping(b.as_array().as_ptr(), dst.cast::<u8>(), b.len());
};
for i in 0..batches {
decode_v128x2(
src.as_ptr().add(i * BATCH_ELEMS_V128_X2).cast::<u8>(),
dst.as_mut_ptr()
.add(i * BATCH_ELEMS_V128_X2)
.cast::<MaybeUninit<u8>>(),
);
}
if invalid {
return Err(InvalidInput);
}
let src = ptr::slice_from_raw_parts(
src.as_ptr()
.cast::<[u8; 2]>()
.add(batches * BATCH_ELEMS_V128_X2),
remainder,
);
let dst = ptr::slice_from_raw_parts_mut(
dst.as_mut_ptr()
.cast::<MaybeUninit<u8>>()
.add(batches * BATCH_ELEMS_V128_X2),
remainder,
);
decode_generic_unchecked::<false>(src, dst)
} else {
decode_generic_unchecked::<false>(src, dst)
}
}
#[cfg(test)]
mod smoking {
use super::*;
use crate::backend::tests::{
check_decode_validation_any_backend, check_encode_decode_any_backend,
};
fn decode_simd128_unchecked_test(
src: *const [[u8; 2]],
dst: *mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
unsafe { decode_simd128_unchecked(&*src, &mut *dst) }
}
#[test]
fn test_encode_decode_simd128() {
check_encode_decode_any_backend::<true>(
encode_simd128_unchecked::<true>,
decode_generic_unchecked::<false>,
);
check_encode_decode_any_backend::<false>(
encode_simd128_unchecked::<false>,
decode_generic_unchecked::<false>,
);
check_encode_decode_any_backend::<true>(
encode_simd128_unchecked::<true>,
decode_simd128_unchecked_test,
);
check_encode_decode_any_backend::<false>(
encode_simd128_unchecked::<false>,
decode_simd128_unchecked_test,
);
}
#[test]
fn test_decode_validation_simd128() {
check_decode_validation_any_backend(decode_simd128_unchecked_test);
}
}