#![allow(unsafe_code, reason = "SIMD")]
#![allow(unsafe_op_in_unsafe_fn, reason = "SIMD")]
#![allow(clippy::cast_possible_wrap, reason = "SIMD")]
#![allow(clippy::similar_names, reason = "SIMD")]
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use core::mem::MaybeUninit;
use crate::backend::generic::{decode_generic_unchecked, encode_generic_unchecked};
use crate::error::InvalidInput;
use crate::util::{digits16, digits32, digits64};
#[target_feature(enable = "ssse3")]
pub(crate) unsafe fn encode_ssse3_unchecked<const UPPER: bool, const ENABLE_BATCH_X4: bool>(
mut src: &[u8],
mut dst: &mut [[MaybeUninit<u8>; 2]],
) {
const BATCH_X4: usize = BATCH * 4;
const BATCH: usize = size_of::<__m128i>();
if src.len() >= BATCH {
let m = _mm_set1_epi8(0b_0000_1111);
let lut = _mm_loadu_si128(digits16::<UPPER>().as_ptr().cast());
macro_rules! encode {
($chunk:expr) => {{
let hi = _mm_and_si128(_mm_srli_epi16($chunk, 4), m);
let lo = _mm_and_si128($chunk, m);
let a = _mm_unpacklo_epi8(hi, lo);
let b = _mm_unpackhi_epi8(hi, lo);
let a = _mm_shuffle_epi8(lut, a);
let b = _mm_shuffle_epi8(lut, b);
(a, b)
}};
}
while ENABLE_BATCH_X4 && src.len() >= BATCH_X4 {
let chunk1 = _mm_loadu_si128(src.as_ptr().cast::<__m128i>());
let chunk2 = _mm_loadu_si128(src.as_ptr().cast::<__m128i>().add(1));
let chunk3 = _mm_loadu_si128(src.as_ptr().cast::<__m128i>().add(2));
let chunk4 = _mm_loadu_si128(src.as_ptr().cast::<__m128i>().add(3));
let (a1, b1) = encode!(chunk1);
let (a2, b2) = encode!(chunk2);
let (a3, b3) = encode!(chunk3);
let (a4, b4) = encode!(chunk4);
{
let dst = dst.as_mut_ptr().cast::<__m128i>();
_mm_storeu_si128(dst, a1);
_mm_storeu_si128(dst.add(1), b1);
_mm_storeu_si128(dst.add(2), a2);
_mm_storeu_si128(dst.add(3), b2);
_mm_storeu_si128(dst.add(4), a3);
_mm_storeu_si128(dst.add(5), b3);
_mm_storeu_si128(dst.add(6), a4);
_mm_storeu_si128(dst.add(7), b4);
}
src = &src[BATCH_X4..];
dst = dst.get_unchecked_mut(BATCH_X4..);
}
while src.len() >= BATCH {
let chunk = _mm_loadu_si128(src.as_ptr().cast());
let (a, b) = encode!(chunk);
{
let dst = dst.as_mut_ptr().cast::<__m128i>();
_mm_storeu_si128(dst, a);
_mm_storeu_si128(dst.add(1), b);
}
src = &src[BATCH..];
dst = dst.get_unchecked_mut(BATCH..);
}
}
encode_generic_unchecked::<UPPER>(src, dst);
}
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn encode_avx2_unchecked<const UPPER: bool, const ENABLE_BATCH_X4: bool>(
mut src: &[u8],
mut dst: &mut [[MaybeUninit<u8>; 2]],
) {
const BATCH_X4: usize = BATCH * 4;
const BATCH: usize = size_of::<__m256i>();
if src.len() >= if ENABLE_BATCH_X4 { BATCH_X4 } else { BATCH } {
let m = _mm256_set1_epi8(0b_0000_1111);
let lut = _mm256_loadu_si256(digits32::<UPPER>().as_ptr().cast());
macro_rules! encode {
($chunk:expr) => {{
let hi = _mm256_and_si256(_mm256_srli_epi16::<4>($chunk), m);
let lo = _mm256_and_si256($chunk, m);
let ac = _mm256_unpacklo_epi8(hi, lo);
let bd = _mm256_unpackhi_epi8(hi, lo);
let ab = _mm256_permute2x128_si256::<0x20>(ac, bd);
let cd = _mm256_permute2x128_si256::<0x31>(ac, bd);
let ab = _mm256_shuffle_epi8(lut, ab);
let cd = _mm256_shuffle_epi8(lut, cd);
(ab, cd)
}};
}
while ENABLE_BATCH_X4 && src.len() >= BATCH_X4 {
let chunk1 = _mm256_loadu_si256(src.as_ptr().cast::<__m256i>());
let chunk2 = _mm256_loadu_si256(src.as_ptr().cast::<__m256i>().add(1));
let chunk3 = _mm256_loadu_si256(src.as_ptr().cast::<__m256i>().add(2));
let chunk4 = _mm256_loadu_si256(src.as_ptr().cast::<__m256i>().add(3));
let (ab1, cd1) = encode!(chunk1);
let (ab2, cd2) = encode!(chunk2);
let (ab3, cd3) = encode!(chunk3);
let (ab4, cd4) = encode!(chunk4);
{
let dst = dst.as_mut_ptr().cast::<__m256i>();
_mm256_storeu_si256(dst, ab1);
_mm256_storeu_si256(dst.add(1), cd1);
_mm256_storeu_si256(dst.add(2), ab2);
_mm256_storeu_si256(dst.add(3), cd2);
_mm256_storeu_si256(dst.add(4), ab3);
_mm256_storeu_si256(dst.add(5), cd3);
_mm256_storeu_si256(dst.add(6), ab4);
_mm256_storeu_si256(dst.add(7), cd4);
}
src = &src[BATCH_X4..];
dst = dst.get_unchecked_mut(BATCH_X4..);
}
while src.len() >= BATCH {
let chunk = _mm256_loadu_si256(src.as_ptr().cast());
let (ab, cd) = encode!(chunk);
{
let dst = dst.as_mut_ptr().cast::<__m256i>();
_mm256_storeu_si256(dst, ab);
_mm256_storeu_si256(dst.add(1), cd);
}
src = &src[BATCH..];
dst = dst.get_unchecked_mut(BATCH..);
}
}
encode_ssse3_unchecked::<UPPER, false>(src, dst);
}
#[target_feature(enable = "avx512f,avx512bw")]
pub(crate) unsafe fn encode_avx512_unchecked<const UPPER: bool, const ENABLE_BATCH_X4: bool>(
mut src: &[u8],
mut dst: &mut [[MaybeUninit<u8>; 2]],
) {
const BATCH_X4: usize = BATCH * 4;
const BATCH: usize = size_of::<__m512i>();
if src.len() >= if ENABLE_BATCH_X4 { BATCH_X4 } else { BATCH } {
let m = _mm512_set1_epi8(0b1111);
let lut = _mm512_loadu_si512(digits64::<UPPER>().as_ptr().cast());
let idx_abcd = _mm512_set_epi64(11, 10, 3, 2, 9, 8, 1, 0);
let idx_efgh = _mm512_set_epi64(15, 14, 7, 6, 13, 12, 5, 4);
macro_rules! encode {
($chunk:expr) => {{
let hi = _mm512_and_si512(_mm512_srli_epi16::<4>($chunk), m);
let lo = _mm512_and_si512($chunk, m);
let aceg = _mm512_unpacklo_epi8(hi, lo);
let bfdh = _mm512_unpackhi_epi8(hi, lo);
let abcd = _mm512_permutex2var_epi64(aceg, idx_abcd, bfdh);
let efgh = _mm512_permutex2var_epi64(aceg, idx_efgh, bfdh);
let abcd = _mm512_shuffle_epi8(lut, abcd);
let efgh = _mm512_shuffle_epi8(lut, efgh);
(abcd, efgh)
}};
}
while ENABLE_BATCH_X4 && src.len() >= BATCH_X4 {
let chunk1 = _mm512_loadu_si512(src.as_ptr().cast::<__m512i>());
let chunk2 = _mm512_loadu_si512(src.as_ptr().cast::<__m512i>().add(1));
let chunk3 = _mm512_loadu_si512(src.as_ptr().cast::<__m512i>().add(2));
let chunk4 = _mm512_loadu_si512(src.as_ptr().cast::<__m512i>().add(3));
let (abcd1, efgh1) = encode!(chunk1);
let (abcd2, efgh2) = encode!(chunk2);
let (abcd3, efgh3) = encode!(chunk3);
let (abcd4, efgh4) = encode!(chunk4);
{
let out = dst.as_mut_ptr().cast::<__m512i>();
_mm512_storeu_si512(out, abcd1);
_mm512_storeu_si512(out.add(1), efgh1);
_mm512_storeu_si512(out.add(2), abcd2);
_mm512_storeu_si512(out.add(3), efgh2);
_mm512_storeu_si512(out.add(4), abcd3);
_mm512_storeu_si512(out.add(5), efgh3);
_mm512_storeu_si512(out.add(6), abcd4);
_mm512_storeu_si512(out.add(7), efgh4);
}
src = &src[BATCH_X4..];
dst = dst.get_unchecked_mut(BATCH_X4..);
}
while src.len() >= BATCH {
let chunk = _mm512_loadu_si512(src.as_ptr().cast());
let (abcd, efgh) = encode!(chunk);
{
let out = dst.as_mut_ptr().cast::<__m512i>();
_mm512_storeu_si512(out, abcd);
_mm512_storeu_si512(out.add(1), efgh);
}
src = &src[BATCH..];
dst = dst.get_unchecked_mut(BATCH..);
}
}
encode_avx2_unchecked::<UPPER, false>(src, dst);
}
#[target_feature(enable = "ssse3")]
pub(crate) unsafe fn decode_ssse3_unchecked<const ENABLE_BATCH_X4: bool>(
mut src: &[[u8; 2]],
mut dst: &mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
const BATCH_X4: usize = BATCH * 4;
const BATCH: usize = size_of::<__m128i>() / 2;
if src.len() >= BATCH {
let n_c6 = _mm_set1_epi8((0xFF_u8 - b'9') as i8);
let n_06 = _mm_set1_epi8(0x06);
let n_f0 = _mm_set1_epi8(0xF0_u8 as i8);
let n_df = _mm_set1_epi8(0xDF_u8 as i8);
let u_a = _mm_set1_epi8(b'A' as i8);
let n_0a = _mm_set1_epi8(0x0A);
macro_rules! n {
($chunk:expr) => {{
let d = _mm_sub_epi8(_mm_subs_epu8(_mm_add_epi8($chunk, n_c6), n_06), n_f0);
let a = _mm_adds_epu8(_mm_sub_epi8(_mm_and_si128($chunk, n_df), u_a), n_0a);
_mm_min_epu8(d, a)
}};
}
let trick = _mm_set1_epi8(127 - 15);
let weights = _mm_set1_epi16(0x0110);
while ENABLE_BATCH_X4 && src.len() >= BATCH_X4 {
let chunk1 = _mm_loadu_si128(src.as_ptr().cast::<__m128i>());
let chunk2 = _mm_loadu_si128(src.as_ptr().cast::<__m128i>().add(1));
let chunk3 = _mm_loadu_si128(src.as_ptr().cast::<__m128i>().add(2));
let chunk4 = _mm_loadu_si128(src.as_ptr().cast::<__m128i>().add(3));
let n1 = n!(chunk1);
let n2 = n!(chunk2);
let n3 = n!(chunk3);
let n4 = n!(chunk4);
{
let bad = _mm_or_si128(
_mm_or_si128(_mm_adds_epu8(n1, trick), _mm_adds_epu8(n2, trick)),
_mm_or_si128(_mm_adds_epu8(n3, trick), _mm_adds_epu8(n4, trick)),
);
if _mm_movemask_epi8(bad) != 0 {
return Err(InvalidInput);
}
}
let b1 = _mm_maddubs_epi16(n1, weights);
let b2 = _mm_maddubs_epi16(n2, weights);
let b3 = _mm_maddubs_epi16(n3, weights);
let b4 = _mm_maddubs_epi16(n4, weights);
let b12 = _mm_packus_epi16(b1, b2);
let b34 = _mm_packus_epi16(b3, b4);
_mm_storeu_si128(dst.as_mut_ptr().cast::<__m128i>(), b12);
_mm_storeu_si128(dst.as_mut_ptr().cast::<__m128i>().add(1), b34);
src = &src[BATCH_X4..];
dst = dst.get_unchecked_mut(BATCH_X4..);
}
while src.len() >= BATCH {
let chunk = _mm_loadu_si128(src.as_ptr().cast::<__m128i>());
let n = n!(chunk);
if _mm_movemask_epi8(_mm_adds_epu8(n, trick)) != 0 {
return Err(InvalidInput);
}
let bytes = {
let b = _mm_maddubs_epi16(n, weights);
_mm_shuffle_epi8(
b,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1),
)
};
_mm_storel_epi64(dst.as_mut_ptr().cast(), bytes);
src = &src[BATCH..];
dst = dst.get_unchecked_mut(BATCH..);
}
}
decode_generic_unchecked::<false>(src, dst)
}
#[target_feature(enable = "avx2")]
pub(crate) unsafe fn decode_avx2_unchecked<const ENABLE_BATCH_X4: bool>(
mut src: &[[u8; 2]],
mut dst: &mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
const BATCH_X4: usize = BATCH * 4;
const BATCH: usize = size_of::<__m256i>() / 2;
if src.len() >= if ENABLE_BATCH_X4 { BATCH_X4 } else { BATCH } {
let n_c6 = _mm256_set1_epi8((0xFF_u8 - b'9') as i8);
let n_06 = _mm256_set1_epi8(0x06);
let n_f0 = _mm256_set1_epi8(0xF0_u8 as i8);
let n_df = _mm256_set1_epi8(0xDF_u8 as i8);
let u_a = _mm256_set1_epi8(b'A' as i8);
let n_0a = _mm256_set1_epi8(0x0A);
macro_rules! n {
($chunk:expr) => {{
let d =
_mm256_sub_epi8(_mm256_subs_epu8(_mm256_add_epi8($chunk, n_c6), n_06), n_f0);
let a =
_mm256_adds_epu8(_mm256_sub_epi8(_mm256_and_si256($chunk, n_df), u_a), n_0a);
_mm256_min_epu8(d, a)
}};
}
let trick = _mm256_set1_epi8(127 - 15);
let weights = _mm256_set1_epi16(0x0110);
while ENABLE_BATCH_X4 && src.len() >= BATCH_X4 {
let chunk1 = _mm256_loadu_si256(src.as_ptr().cast::<__m256i>());
let chunk2 = _mm256_loadu_si256(src.as_ptr().cast::<__m256i>().add(1));
let chunk3 = _mm256_loadu_si256(src.as_ptr().cast::<__m256i>().add(2));
let chunk4 = _mm256_loadu_si256(src.as_ptr().cast::<__m256i>().add(3));
let n1 = n!(chunk1);
let n2 = n!(chunk2);
let n3 = n!(chunk3);
let n4 = n!(chunk4);
{
let bad = _mm256_or_si256(
_mm256_or_si256(_mm256_adds_epu8(n1, trick), _mm256_adds_epu8(n2, trick)),
_mm256_or_si256(_mm256_adds_epu8(n3, trick), _mm256_adds_epu8(n4, trick)),
);
if _mm256_movemask_epi8(bad) != 0 {
return Err(InvalidInput);
}
}
let b1 = _mm256_maddubs_epi16(n1, weights);
let b2 = _mm256_maddubs_epi16(n2, weights);
let b3 = _mm256_maddubs_epi16(n3, weights);
let b4 = _mm256_maddubs_epi16(n4, weights);
let b12 = _mm256_permute4x64_epi64::<0b11_01_10_00>(_mm256_packus_epi16(b1, b2));
let b34 = _mm256_permute4x64_epi64::<0b11_01_10_00>(_mm256_packus_epi16(b3, b4));
_mm256_storeu_si256(dst.as_mut_ptr().cast::<__m256i>(), b12);
_mm256_storeu_si256(dst.as_mut_ptr().cast::<__m256i>().add(1), b34);
src = &src[BATCH_X4..];
dst = dst.get_unchecked_mut(BATCH_X4..);
}
while src.len() >= BATCH {
let chunk = _mm256_loadu_si256(src.as_ptr().cast::<__m256i>());
let n = n!(chunk);
if _mm256_movemask_epi8(_mm256_adds_epu8(n, trick)) != 0 {
return Err(InvalidInput);
}
let bytes = {
let b = _mm256_maddubs_epi16(n, weights);
let packed = _mm256_shuffle_epi8(
b,
_mm256_setr_epi8(
0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1, 0, 2, 4, 6, 8,
10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1,
),
);
_mm256_permute4x64_epi64::<0b11_01_10_00>(packed)
};
_mm_storeu_si128(dst.as_mut_ptr().cast(), _mm256_castsi256_si128(bytes));
src = &src[BATCH..];
dst = dst.get_unchecked_mut(BATCH..);
}
}
decode_ssse3_unchecked::<false>(src, dst)
}
#[target_feature(enable = "avx512f,avx512bw")]
pub(crate) unsafe fn decode_avx512_unchecked<const ENABLE_BATCH_X4: bool>(
mut src: &[[u8; 2]],
mut dst: &mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
const BATCH_X4: usize = BATCH * 4;
const BATCH: usize = size_of::<__m512i>() / 2;
if src.len() >= if ENABLE_BATCH_X4 { BATCH_X4 } else { BATCH } {
let n_c6 = _mm512_set1_epi8((0xFF_u8 - b'9') as i8);
let n_06 = _mm512_set1_epi8(0x06);
let n_f0 = _mm512_set1_epi8(0xF0_u8 as i8);
let n_df = _mm512_set1_epi8(0xDF_u8 as i8);
let u_a = _mm512_set1_epi8(b'A' as i8);
let n_0a = _mm512_set1_epi8(0x0A);
macro_rules! n {
($chunk:expr) => {{
let d =
_mm512_sub_epi8(_mm512_subs_epu8(_mm512_add_epi8($chunk, n_c6), n_06), n_f0);
let a =
_mm512_adds_epu8(_mm512_sub_epi8(_mm512_and_si512($chunk, n_df), u_a), n_0a);
_mm512_min_epu8(d, a)
}};
}
let n_0f = _mm512_set1_epi8(0x0F);
let weights = _mm512_set1_epi16(0x0110);
while ENABLE_BATCH_X4 && src.len() >= BATCH_X4 {
let chunk1 = _mm512_loadu_si512(src.as_ptr().cast::<__m512i>());
let chunk2 = _mm512_loadu_si512(src.as_ptr().cast::<__m512i>().add(1));
let chunk3 = _mm512_loadu_si512(src.as_ptr().cast::<__m512i>().add(2));
let chunk4 = _mm512_loadu_si512(src.as_ptr().cast::<__m512i>().add(3));
let n1 = n!(chunk1);
let n2 = n!(chunk2);
let n3 = n!(chunk3);
let n4 = n!(chunk4);
{
let bad1: u64 = _mm512_cmpgt_epu8_mask(n1, n_0f);
let bad2: u64 = _mm512_cmpgt_epu8_mask(n2, n_0f);
let bad3: u64 = _mm512_cmpgt_epu8_mask(n3, n_0f);
let bad4: u64 = _mm512_cmpgt_epu8_mask(n4, n_0f);
if (bad1 | bad2 | bad3 | bad4) != 0 {
return Err(InvalidInput);
}
}
let b1 = _mm512_cvtepi16_epi8(_mm512_maddubs_epi16(n1, weights));
let b2 = _mm512_cvtepi16_epi8(_mm512_maddubs_epi16(n2, weights));
let b3 = _mm512_cvtepi16_epi8(_mm512_maddubs_epi16(n3, weights));
let b4 = _mm512_cvtepi16_epi8(_mm512_maddubs_epi16(n4, weights));
_mm256_storeu_si256(dst.as_mut_ptr().cast::<__m256i>(), b1);
_mm256_storeu_si256(dst.as_mut_ptr().cast::<__m256i>().add(1), b2);
_mm256_storeu_si256(dst.as_mut_ptr().cast::<__m256i>().add(2), b3);
_mm256_storeu_si256(dst.as_mut_ptr().cast::<__m256i>().add(3), b4);
src = &src[BATCH_X4..];
dst = dst.get_unchecked_mut(BATCH_X4..);
}
while src.len() >= BATCH {
let chunk = _mm512_loadu_si512(src.as_ptr().cast::<__m512i>());
let n = n!(chunk);
if _mm512_cmpgt_epu8_mask(n, n_0f) != 0 {
return Err(InvalidInput);
}
let bytes = {
let b = _mm512_maddubs_epi16(n, weights);
_mm512_cvtepi16_epi8(b)
};
_mm256_storeu_si256(dst.as_mut_ptr().cast(), bytes);
src = &src[BATCH..];
dst = dst.get_unchecked_mut(BATCH..);
}
}
decode_avx2_unchecked::<false>(src, dst)
}
#[cfg(test)]
mod smoking {
use alloc::string::String;
use alloc::vec;
use alloc::vec::Vec;
use core::mem::MaybeUninit;
use core::slice;
use super::*;
use crate::util::{DIGITS_LOWER_16, DIGITS_UPPER_16};
macro_rules! test {
(
Encode = $encode_f:ident;
Decode = $($decode_f:ident),*;
Case = $i:expr
) => {{
let input = $i;
let expected_lower = input
.iter()
.flat_map(|b| [
DIGITS_LOWER_16[(*b >> 4) as usize] as char,
DIGITS_LOWER_16[(*b & 0b1111) as usize] as char,
])
.collect::<String>();
let expected_upper = input
.iter()
.flat_map(|b| [
DIGITS_UPPER_16[(*b >> 4) as usize] as char,
DIGITS_UPPER_16[(*b & 0b1111) as usize] as char,
])
.collect::<String>();
let mut output_lower = vec![[MaybeUninit::<u8>::uninit(); 2]; input.len()];
let mut output_upper = vec![[MaybeUninit::<u8>::uninit(); 2]; input.len()];
unsafe {
$encode_f::<false, true>(input, &mut output_lower);
$encode_f::<true, true>(input, &mut output_upper);
}
let output_lower = unsafe {
slice::from_raw_parts(
output_lower.as_ptr().cast::<[u8; 2]>(),
output_lower.len(),
)
};
let output_upper = unsafe {
slice::from_raw_parts(
output_upper.as_ptr().cast::<[u8; 2]>(),
output_upper.len(),
)
};
assert_eq!(
output_lower.as_flattened(),
expected_lower.as_bytes(),
"Encode error, expect \"{expected_lower}\", got \"{}\" ({:?})",
str::from_utf8(output_lower.as_flattened()).unwrap_or("<invalid utf-8>"),
output_lower.as_flattened()
);
assert_eq!(
output_upper.as_flattened(),
expected_upper.as_bytes(),
"Encode error, expect \"{expected_upper}\", got \"{}\" ({:?})",
str::from_utf8(output_upper.as_flattened()).unwrap_or("<invalid utf-8>"),
output_upper.as_flattened()
);
$({
let mut decoded_lower = vec![MaybeUninit::<u8>::uninit(); input.len()];
let mut decoded_upper = vec![MaybeUninit::<u8>::uninit(); input.len()];
unsafe {
$decode_f::<true>(output_lower, &mut decoded_lower).unwrap();
$decode_f::<true>(output_upper, &mut decoded_upper).unwrap();
assert_eq!(
decoded_lower.assume_init_ref(),
input,
"Decode error for {}, expect {:?}, got {:?}",
stringify!($decode_f),
input,
decoded_lower.assume_init_ref()
);
assert_eq!(
decoded_upper.assume_init_ref(),
input,
"Decode error for {}, expect {:?}, got {:?}",
stringify!($decode_f),
input,
decoded_upper.assume_init_ref()
);
}
})*
}};
}
const CASE: &[u8; 513] = &[
0xBD, 0xE8, 0xAC, 0xA5, 0x82, 0x41, 0x8A, 0x10, 0x66, 0x56, 0xE4, 0xF3, 0x06, 0x13, 0xD0,
0x06, 0x3E, 0x19, 0x4B, 0x7E, 0xE1, 0xAB, 0x24, 0x03, 0x29, 0xD0, 0x8B, 0x91, 0x06, 0x56,
0xF4, 0x44, 0x4E, 0x7B, 0x00, 0x76, 0xFB, 0xA3, 0xB4, 0x4F, 0x9E, 0x4E, 0x3E, 0x20, 0x89,
0x29, 0x17, 0x47, 0x4D, 0x59, 0xF7, 0x9E, 0xAE, 0x0A, 0xB4, 0x16, 0xEB, 0x2B, 0x0D, 0xA2,
0x35, 0x99, 0x1D, 0x94, 0xA0, 0x23, 0xFF, 0x60, 0x0F, 0x67, 0xDB, 0xB5, 0xEF, 0x89, 0xC2,
0x3C, 0x2C, 0x24, 0x0E, 0x04, 0x05, 0x35, 0x31, 0xAA, 0x88, 0xB4, 0x04, 0x82, 0x21, 0x8B,
0x24, 0x88, 0x3F, 0x19, 0x94, 0x36, 0xDB, 0x52, 0x9E, 0x89, 0x7D, 0x53, 0x6D, 0x8D, 0xDF,
0xF7, 0xFD, 0x2A, 0x8F, 0x4B, 0x20, 0xAB, 0xAC, 0xA4, 0x4B, 0xBB, 0x5C, 0x10, 0x0D, 0x7B,
0xEF, 0x3A, 0x03, 0xF7, 0x4D, 0x15, 0x10, 0x8C, 0xB1, 0x0A, 0x86, 0x6A, 0x19, 0x6F, 0x25,
0xA6, 0xE3, 0x4B, 0xA8, 0x9D, 0x78, 0xC7, 0x19, 0x19, 0x09, 0x05, 0x08, 0x9A, 0xA1, 0x67,
0x48, 0xF7, 0x9E, 0x3C, 0xFA, 0xD3, 0xFD, 0x5E, 0x1A, 0x09, 0xD8, 0x85, 0x7F, 0xA5, 0x73,
0x34, 0xBF, 0x93, 0xCC, 0xF4, 0x8D, 0x8A, 0x62, 0xBD, 0xD5, 0x67, 0x39, 0x0D, 0xB7, 0x41,
0x94, 0x7D, 0xB5, 0xB3, 0x5B, 0x95, 0x1F, 0x43, 0xE4, 0x77, 0x40, 0x41, 0x9E, 0x26, 0x34,
0x73, 0x0D, 0x93, 0x0C, 0xE9, 0xB7, 0x3C, 0x97, 0x3D, 0xA4, 0xBC, 0xAA, 0xDA, 0xA9, 0xFB,
0x78, 0xD8, 0xE4, 0xB4, 0xE8, 0x88, 0x29, 0x9B, 0xE4, 0x5B, 0xF4, 0x56, 0xC4, 0x0D, 0x50,
0x05, 0x0F, 0x84, 0x51, 0xD4, 0x96, 0x3E, 0xC5, 0x4F, 0xCD, 0xEF, 0x2B, 0x0F, 0x78, 0x1D,
0xE6, 0x4A, 0x90, 0xC6, 0xD8, 0xF7, 0x88, 0x0D, 0x58, 0x2C, 0xE7, 0x37, 0x4A, 0x94, 0x5F,
0x56, 0x68, 0x84, 0xEE, 0xD2, 0xD6, 0x8A, 0xC9, 0x8A, 0x90, 0x70, 0xF7, 0x51, 0xC9, 0xD1,
0x86, 0x5A, 0xB2, 0xD5, 0x91, 0xDB, 0xDF, 0x36, 0xF1, 0xD3, 0x69, 0xB1, 0x7D, 0x39, 0x0E,
0xCC, 0x86, 0xEF, 0xBD, 0xBD, 0x13, 0x52, 0x2A, 0xFC, 0x72, 0x78, 0x14, 0x28, 0xDD, 0xD5,
0xEE, 0xF8, 0x72, 0x0F, 0x26, 0x76, 0xC6, 0x5E, 0x1B, 0x50, 0x30, 0xDB, 0x93, 0xD9, 0x20,
0xA3, 0x07, 0x4D, 0x85, 0x50, 0x40, 0x28, 0x1E, 0x40, 0x4B, 0x96, 0xD6, 0x8C, 0xAF, 0x8E,
0xD4, 0xD7, 0x81, 0x31, 0x97, 0x47, 0x2A, 0x95, 0xC3, 0x03, 0xA2, 0x40, 0xC9, 0x55, 0xBF,
0x64, 0x1A, 0xAB, 0x81, 0xA1, 0x6B, 0x6A, 0x56, 0x81, 0xDD, 0xD2, 0x68, 0x1D, 0xB7, 0xDB,
0xD6, 0x9E, 0xDA, 0x84, 0xFC, 0x5B, 0xE0, 0x34, 0xAD, 0x61, 0x5E, 0xD1, 0xF5, 0x74, 0x79,
0xE9, 0xED, 0xB5, 0x31, 0x3C, 0x7F, 0xB1, 0x44, 0xE0, 0x23, 0xAE, 0xBD, 0x9E, 0x13, 0x8A,
0x9D, 0xAF, 0x48, 0x75, 0x06, 0x16, 0x58, 0x4A, 0x8B, 0xD3, 0xB7, 0x06, 0x14, 0xB5, 0x92,
0xE2, 0xA1, 0x9F, 0xCF, 0x42, 0x3E, 0x99, 0x24, 0xE4, 0x65, 0x93, 0x84, 0x83, 0x66, 0x26,
0x28, 0xEA, 0x3F, 0x05, 0x4E, 0xAC, 0x7C, 0x96, 0xF2, 0x50, 0x22, 0xF3, 0xCD, 0x90, 0x81,
0x73, 0xBD, 0x3D, 0xCA, 0xD1, 0x2F, 0xC2, 0x3F, 0x20, 0xF0, 0x1C, 0x41, 0x9D, 0x9A, 0x85,
0x1A, 0xC4, 0xB1, 0xE3, 0xBA, 0x52, 0xE7, 0xE3, 0x22, 0x72, 0x98, 0x76, 0xEC, 0x0B, 0xC4,
0x07, 0xA3, 0x05, 0x01, 0xC0, 0x40, 0xA7, 0x0E, 0x8A, 0x0F, 0xDE, 0x5F, 0x65, 0xA3, 0x89,
0x34, 0x3B, 0xFD, 0x9F, 0xE4, 0xB1, 0x6C, 0x1B, 0x40, 0xE6, 0xC2, 0x58, 0xE3, 0x62, 0xFC,
0xB0, 0x22, 0x02, 0xD2, 0xE2, 0xF6, 0xFD, 0x4D, 0x64, 0xF5, 0x17, 0x07, 0x04, 0x34, 0x50,
0x04, 0xEF, 0xAB,
];
#[test]
#[cfg_attr(miri, ignore)]
fn test_ssse3() {
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &[]
}
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..15]
}
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..16]
}
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..17]
};
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..31]
}
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..32]
}
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..33]
}
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..63]
}
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..64]
}
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..65]
}
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..127]
}
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..128]
}
test! {
Encode = encode_ssse3_unchecked;
Decode = decode_ssse3_unchecked;
Case = &CASE[..129]
}
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_avx2() {
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &[]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..31]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..32]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..33]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..63]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..64]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..65]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..127]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..128]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..129]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..255]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..256]
}
test! {
Encode = encode_avx2_unchecked;
Decode = decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..257]
}
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_avx512() {
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &[]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..63]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..64]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..65]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..127]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..128]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..129]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..255]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..256]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..257]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..511]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..512]
}
test! {
Encode = encode_avx512_unchecked;
Decode = decode_avx512_unchecked, decode_avx2_unchecked, decode_ssse3_unchecked;
Case = &CASE[..513]
}
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_validation() {
for l in [
15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, 129, 255, 256, 257, 511, 512, 513,
] {
for c in 0u8..=255 {
let mut bytes = vec![b'a'; l * 2];
bytes[l] = c;
let bytes = unsafe { bytes.as_chunks_unchecked() };
if c.is_ascii_hexdigit() {
unsafe {
assert!(
decode_ssse3_unchecked::<true>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_ok(),
"ssse3 validation failed for byte {c} (l={l})",
);
assert!(
decode_ssse3_unchecked::<false>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_ok(),
"ssse3 validation failed for byte {c} (l={l})",
);
assert!(
decode_avx2_unchecked::<true>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_ok(),
"avx2 validation failed for byte {c} (l={l})"
);
assert!(
decode_avx2_unchecked::<false>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_ok(),
"avx2 validation failed for byte {c} (l={l})"
);
assert!(
decode_avx512_unchecked::<true>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_ok(),
"avx512 validation failed for byte {c} (l={l})"
);
assert!(
decode_avx512_unchecked::<false>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_ok(),
"avx512 validation failed for byte {c} (l={l})"
);
}
} else {
unsafe {
assert!(
decode_ssse3_unchecked::<true>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_err(),
"ssse3 validation failed for byte {c} (l={l})"
);
assert!(
decode_ssse3_unchecked::<false>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_err(),
"ssse3 validation failed for byte {c} (l={l})"
);
assert!(
decode_avx2_unchecked::<true>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_err(),
"avx2 validation failed for byte {c} (l={l})"
);
assert!(
decode_avx2_unchecked::<false>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_err(),
"avx2 validation failed for byte {c} (l={l})"
);
assert!(
decode_avx512_unchecked::<true>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_err(),
"avx512 validation failed for byte {c} (l={l})"
);
assert!(
decode_avx512_unchecked::<false>(
bytes,
Vec::with_capacity(l).spare_capacity_mut()
)
.is_err(),
"avx512 validation failed for byte {c} (l={l})"
);
}
}
}
}
}
}