#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use crate::num::{SignedVarIntTarget, VarIntTarget};
#[inline]
#[cfg(any(target_feature = "sse2", doc))]
#[cfg_attr(rustc_nightly, doc(cfg(target_feature = "sse2")))]
pub fn encode<T: VarIntTarget>(num: T) -> ([u8; 16], u8) {
unsafe { encode_unsafe(num) }
}
#[inline]
#[cfg(any(target_feature = "sse2", doc))]
#[cfg_attr(rustc_nightly, doc(cfg(target_feature = "sse2")))]
pub fn encode_zigzag<T: SignedVarIntTarget>(num: T) -> ([u8; 16], u8) {
unsafe { encode_unsafe(T::Unsigned::zigzag(num)) }
}
#[inline]
#[cfg(any(target_feature = "sse2", doc))]
#[cfg_attr(rustc_nightly, doc(cfg(target_feature = "sse2")))]
pub fn encode_to_slice<T: VarIntTarget>(num: T, slice: &mut [u8]) -> u8 {
let (data, size) = encode(num);
slice[..size as usize].copy_from_slice(&data[..size as usize]);
size
}
#[inline]
#[cfg(any(target_feature = "sse2", doc))]
#[cfg_attr(rustc_nightly, doc(cfg(target_feature = "sse2")))]
pub unsafe fn encode_unsafe<T: VarIntTarget>(num: T) -> ([u8; 16], u8) {
if T::MAX_VARINT_BYTES <= 5 {
let stage1 = num.num_to_scalar_stage1();
let leading = stage1.leading_zeros();
let unused_bytes = (leading - 1) / 8;
let bytes_needed = 8 - unused_bytes;
let msbs = 0x8080808080808080;
let msbmask = 0xFFFFFFFFFFFFFFFF >> ((8 - bytes_needed + 1) * 8 - 1);
let merged = stage1 | (msbs & msbmask);
(
core::mem::transmute::<[u64; 2], [u8; 16]>([merged, 0]),
bytes_needed as u8,
)
} else {
let stage1: __m128i = core::mem::transmute(num.num_to_vector_stage1());
let minimum = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffu8 as i8);
let exists = _mm_or_si128(_mm_cmpgt_epi8(stage1, _mm_setzero_si128()), minimum);
let bits = _mm_movemask_epi8(exists);
let bytes = 32 - bits.leading_zeros() as u8;
let ascend = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let mask = _mm_cmplt_epi8(ascend, _mm_set1_epi8(bytes as i8));
let shift = _mm_bsrli_si128(mask, 1);
let msbmask = _mm_and_si128(shift, _mm_set1_epi8(128u8 as i8));
let merged = _mm_or_si128(stage1, msbmask);
(core::mem::transmute::<__m128i, [u8; 16]>(merged), bytes)
}
}