use crate::encode::scalar;
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use core::mem::MaybeUninit;
#[target_feature(enable = "ssse3")]
pub unsafe fn encode_into_unchecked(input: &[u8], output: &mut [MaybeUninit<u8>]) -> usize {
let mut len = input.len();
let out_len = output.len();
let mut written = 0;
let mut ptr = input.as_ptr();
let mut out_ptr = output.as_mut_ptr();
let shuf = _mm_set_epi8(10, 9, 11, 10, 7, 6, 8, 7, 4, 3, 5, 4, 1, 0, 2, 1);
while len >= 16 {
let src = _mm_shuffle_epi8(unsafe { _mm_loadu_si128(ptr.cast()) }, shuf);
let t0 = _mm_and_si128(src, _mm_set1_epi32(0x003f03f0));
let t1 = _mm_mullo_epi16(t0, _mm_set1_epi32(0x01000010));
let t2 = _mm_and_si128(src, _mm_set1_epi32(0x0fc0fc00));
let t3 = _mm_mulhi_epu16(t2, _mm_set1_epi32(0x04000040));
let t4 = _mm_or_si128(t1, t3);
let indices = _mm_shuffle_epi8(
t4,
_mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3),
);
let mut result = _mm_or_si128(
_mm_subs_epu8(indices, _mm_set1_epi8(51)),
_mm_and_si128(
_mm_cmpgt_epi8(_mm_set1_epi8(26), indices),
_mm_set1_epi8(13),
),
);
let offsets = _mm_setr_epi8(
39, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -22, -22, 97, 0, 0,
);
result = _mm_add_epi8(_mm_shuffle_epi8(offsets, result), indices);
unsafe {
_mm_storeu_si128(out_ptr.cast(), result);
out_ptr = out_ptr.add(16);
written += 16;
ptr = ptr.add(12);
len -= 12;
}
}
written
+ unsafe {
scalar::encode_into_unchecked(
core::slice::from_raw_parts(ptr, len),
core::slice::from_raw_parts_mut(out_ptr, out_len - written),
)
}
}