#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use super::*;
#[inline]
#[target_feature(enable = "ssse3")]
unsafe fn split_bytes(value: __m128i) -> __m128i {
let input = _mm_shuffle_epi8(value, _mm_setr_epi8(
1, 0, 2, 1,
4, 3, 5, 4,
7, 6, 8, 7,
10, 9, 11, 10,
));
let t0 = _mm_and_si128(input, _mm_set1_epi32(0x0fc0fc00));
let t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
let t2 = _mm_and_si128(input, _mm_set1_epi32(0x003f03f0));
let t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
let indices = _mm_or_si128(t1, t3);
return indices;
}
#[inline]
#[target_feature(enable = "ssse3")]
unsafe fn lookup(input: __m128i, base: &Base64) -> __m128i {
let result = _mm_subs_epu8(input, _mm_set1_epi8(51));
let less = _mm_cmpgt_epi8(_mm_set1_epi8(26), input);
let result = _mm_or_si128(result, _mm_and_si128(less, _mm_set1_epi8(13)));
let _a = b'a' as i8 - 26;
let _0 = b'0' as i8 - 52;
let shift_lut = _mm_setr_epi8(
_a, _0, _0, _0, _0, _0,
_0, _0, _0, _0, _0, base.charset[62].wrapping_sub(62) as i8,
base.charset[63].wrapping_sub(63) as i8, b'A' as i8, 0, 0,
);
let result = _mm_shuffle_epi8(shift_lut, result);
return _mm_add_epi8(result, input);
}
#[target_feature(enable = "ssse3")]
pub unsafe fn encode(mut bytes: &[u8], base: &Base64, pad: Padding, mut dest: *mut u8) -> *mut u8 {
while bytes.len() >= 16 {
let data = _mm_loadu_si128(bytes.as_ptr() as *const __m128i);
let split = split_bytes(data);
let ascii = lookup(split, base);
_mm_storeu_si128(dest as *mut __m128i, ascii);
bytes = &bytes[12..];
dest = dest.offset(16);
}
scalar::encode(bytes, base, pad, dest)
}