varint_simd/encode/
mod.rs

1#[cfg(target_arch = "x86")]
2use core::arch::x86::*;
3#[cfg(target_arch = "x86_64")]
4use core::arch::x86_64::*;
5
6use crate::num::{SignedVarIntTarget, VarIntTarget};
7
8/// Encodes a single number to a varint. Requires SSE2 support.
9///
10/// Produces a tuple, with the encoded data followed by the number of bytes used to encode the
11/// varint.
12///
13/// # Examples
14/// ```
15/// use varint_simd::encode;
16///
17/// let encoded = encode::<u32>(1337);
18/// assert_eq!(encoded, ([185, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 2));
19/// ```
20#[inline]
21#[cfg(any(target_feature = "sse2", doc))]
22#[cfg_attr(rustc_nightly, doc(cfg(target_feature = "sse2")))]
23pub fn encode<T: VarIntTarget>(num: T) -> ([u8; 16], u8) {
24    unsafe { encode_unsafe(num) }
25}
26
27/// Convenience function for encoding a single signed integer in ZigZag format to a varint.
28/// See also: [`encode`]
29///
30/// # Examples
31/// ```
32/// use varint_simd::encode_zigzag;
33///
34/// let encoded = encode_zigzag::<i32>(-20);
35/// assert_eq!(encoded, ([39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 1));
36/// ```
37#[inline]
38#[cfg(any(target_feature = "sse2", doc))]
39#[cfg_attr(rustc_nightly, doc(cfg(target_feature = "sse2")))]
40pub fn encode_zigzag<T: SignedVarIntTarget>(num: T) -> ([u8; 16], u8) {
41    unsafe { encode_unsafe(T::Unsigned::zigzag(num)) }
42}
43
44/// Encodes a single number to a varint, and writes the resulting data to the slice. Returns the
45/// number of bytes written (maximum 10 bytes).
46///
47/// See also: [`encode`]
48///
49/// **Panics:** if the slice is too small to contain the varint.
50#[inline]
51#[cfg(any(target_feature = "sse2", doc))]
52#[cfg_attr(rustc_nightly, doc(cfg(target_feature = "sse2")))]
53pub fn encode_to_slice<T: VarIntTarget>(num: T, slice: &mut [u8]) -> u8 {
54    let (data, size) = encode(num);
55    slice[..size as usize].copy_from_slice(&data[..size as usize]);
56
57    size
58}
59
60/// Encodes a single number to a varint. Requires SSE2 support.
61///
62/// Produces a tuple, with the encoded data followed by the number of bytes used to encode the
63/// varint.
64///
65/// # Safety
66/// This should not have any unsafe behavior with any input. However, it still calls a large number
67/// of unsafe functions.
68#[inline]
69#[cfg(any(target_feature = "sse2", doc))]
70#[cfg_attr(rustc_nightly, doc(cfg(target_feature = "sse2")))]
71pub unsafe fn encode_unsafe<T: VarIntTarget>(num: T) -> ([u8; 16], u8) {
72    if T::MAX_VARINT_BYTES <= 5 {
73        // We could kick off a lzcnt here on the original number but that makes the math complicated and slow
74
75        let stage1 = num.num_to_scalar_stage1();
76
77        // We could OR the data with 1 to avoid undefined behavior, but for some reason it's still faster to take the branch
78        let leading = stage1.leading_zeros();
79
80        let unused_bytes = (leading - 1) / 8;
81        let bytes_needed = 8 - unused_bytes;
82
83        // set all but the last MSBs
84        let msbs = 0x8080808080808080;
85        let msbmask = 0xFFFFFFFFFFFFFFFF >> ((8 - bytes_needed + 1) * 8 - 1);
86
87        let merged = stage1 | (msbs & msbmask);
88
89        (
90            core::mem::transmute::<[u64; 2], [u8; 16]>([merged, 0]),
91            bytes_needed as u8,
92        )
93    } else {
94        // Break the number into 7-bit parts and spread them out into a vector
95        let stage1: __m128i = core::mem::transmute(num.num_to_vector_stage1());
96
97        // Create a mask for where there exist values
98        // This signed comparison works because all MSBs should be cleared at this point
99        // Also handle the special case when num == 0
100        let minimum = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffu8 as i8);
101        let exists = _mm_or_si128(_mm_cmpgt_epi8(stage1, _mm_setzero_si128()), minimum);
102        let bits = _mm_movemask_epi8(exists);
103
104        // Count the number of bytes used
105        let bytes = 32 - bits.leading_zeros() as u8; // lzcnt on supported CPUs
106                                                     // TODO: Compiler emits an unnecessary branch here when using bsr/bsl fallback
107
108        // Fill that many bytes into a vector
109        let ascend = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
110        let mask = _mm_cmplt_epi8(ascend, _mm_set1_epi8(bytes as i8));
111
112        // Shift it down 1 byte so the last MSB is the only one set, and make sure only the MSB is set
113        let shift = _mm_bsrli_si128(mask, 1);
114        let msbmask = _mm_and_si128(shift, _mm_set1_epi8(128u8 as i8));
115
116        // Merge the MSB bits into the vector
117        let merged = _mm_or_si128(stage1, msbmask);
118
119        (core::mem::transmute::<__m128i, [u8; 16]>(merged), bytes)
120    }
121}