use crate::encode::scalar;
use core::arch::wasm32::*;
use core::mem::MaybeUninit;
#[target_feature(enable = "simd128")]
pub unsafe fn encode_into_unchecked(input: &[u8], output: &mut [MaybeUninit<u8>]) -> usize {
let mut len = input.len();
let out_len = output.len();
let mut written = 0;
let mut ptr = input.as_ptr();
let mut out_ptr = output.as_mut_ptr();
let shuf = u8x16(1, 2, 0, 1, 4, 5, 3, 4, 7, 8, 6, 7, 10, 11, 9, 10);
while len >= 16 {
let src = u8x16_swizzle(unsafe { v128_load(ptr.cast()) }, shuf);
let index_a = v128_and(u32x4_shr(src, 16), u32x4_splat(0x0000003f));
let index_b = v128_and(u32x4_shr(src, 14), u32x4_splat(0x00003f00));
let index_c = v128_and(u32x4_shl(src, 12), u32x4_splat(0x003f0000));
let index_d = v128_and(u32x4_shl(src, 14), u32x4_splat(0x3f000000));
let indices = v128_or(index_a, v128_or(index_b, v128_or(index_c, index_d)));
let mut result = v128_or(
u8x16_sub_sat(indices, u8x16_splat(51)),
v128_and(u8x16_gt(u8x16_splat(26), indices), u8x16_splat(13)),
);
let offsets = i8x16(
39, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -22, -22, 97, 0, 0,
);
result = i8x16_add(u8x16_swizzle(offsets, result), indices);
unsafe {
v128_store(out_ptr.cast(), result);
out_ptr = out_ptr.add(16);
written += 16;
ptr = ptr.add(12);
len -= 12;
}
}
written
+ unsafe {
scalar::encode_into_unchecked(
core::slice::from_raw_parts(ptr, len),
core::slice::from_raw_parts_mut(out_ptr, out_len - written),
)
}
}