varint_simd/
num.rs

1#[cfg(target_arch = "x86")]
2use core::arch::x86::*;
3#[cfg(target_arch = "x86_64")]
4use core::arch::x86_64::*;
5
6use core::fmt::Debug;
7
8/// Represents an unsigned scalar value that can be encoded to and decoded from a varint.
9pub trait VarIntTarget: Debug + Eq + PartialEq + PartialOrd + Sized + Copy {
10    /// The signed version of this type
11    type Signed: SignedVarIntTarget;
12
13    /// The maximum length of varint that is necessary to represent this number
14    const MAX_VARINT_BYTES: u8;
15
16    /// The maximum value of the last byte if the varint is MAX_VARINT_BYTES long such that the
17    /// varint would not overflow the target
18    const MAX_LAST_VARINT_BYTE: u8;
19
20    /// Converts a 128-bit vector to this number
21    ///
22    /// Note: Despite operating on 128-bit SIMD vectors, these functions accept and return static
23    /// arrays due to a lack of optimization capability by the compiler when passing or returning
24    /// intrinsic vectors.
25    fn vector_to_num(res: [u8; 16]) -> Self;
26
27    fn scalar_to_num(x: u64) -> Self;
28
29    /// Cast from u32 to self
30    fn cast_u32(num: u32) -> Self;
31
32    /// Cast from u64 to self
33    fn cast_u64(num: u64) -> Self;
34
35    /// Splits this number into 7-bit segments for encoding
36    fn num_to_scalar_stage1(self) -> u64;
37
38    /// Splits this number into 7-bit segments for encoding
39    fn num_to_vector_stage1(self) -> [u8; 16];
40
41    /// ZigZag encodes this value
42    fn zigzag(from: Self::Signed) -> Self;
43
44    /// ZigZag decodes this value
45    fn unzigzag(self) -> Self::Signed;
46}
47
48impl VarIntTarget for u8 {
49    type Signed = i8;
50    const MAX_VARINT_BYTES: u8 = 2;
51    const MAX_LAST_VARINT_BYTE: u8 = 0b00000001;
52
53    #[inline(always)]
54    fn vector_to_num(res: [u8; 16]) -> Self {
55        let res: [u64; 2] = unsafe { core::mem::transmute(res) };
56        let x = res[0];
57
58        Self::scalar_to_num(x)
59    }
60
61    #[inline(always)]
62    #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
63    fn scalar_to_num(x: u64) -> Self {
64        unsafe { _pext_u64(x, 0x000000000000017f) as u8 }
65    }
66
67    #[inline(always)]
68    #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
69    fn scalar_to_num(x: u64) -> Self {
70        ((x & 0x000000000000007f) | ((x & 0x0000000000000100) >> 1)) as u8
71    }
72
73    #[inline(always)]
74    fn cast_u32(num: u32) -> Self {
75        num as u8
76    }
77
78    #[inline(always)]
79    fn cast_u64(num: u64) -> Self {
80        num as u8
81    }
82
83    #[inline(always)]
84    #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
85    fn num_to_scalar_stage1(self) -> u64 {
86        let x = self as u64;
87        unsafe { _pdep_u64(x, 0x000000000000017f) }
88    }
89
90    #[inline(always)]
91    #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
92    fn num_to_scalar_stage1(self) -> u64 {
93        let x = self as u64;
94        (x & 0x000000000000007f) | ((x & 0x0000000000000080) << 1)
95    }
96
97    #[inline(always)]
98    #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
99    fn num_to_vector_stage1(self) -> [u8; 16] {
100        let mut res = [0u64; 2];
101        let x = self as u64;
102
103        res[0] = self.num_to_scalar_stage1();
104
105        unsafe { core::mem::transmute(res) }
106    }
107
108    #[inline(always)]
109    #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
110    fn num_to_vector_stage1(self) -> [u8; 16] {
111        let mut res = [0u64; 2];
112
113        res[0] = self.num_to_scalar_stage1();
114
115        unsafe { core::mem::transmute(res) }
116    }
117
118    #[inline(always)]
119    fn zigzag(from: Self::Signed) -> Self {
120        ((from << 1) ^ (from >> 7)) as Self
121    }
122
123    #[inline(always)]
124    fn unzigzag(self) -> Self::Signed {
125        ((self >> 1) ^ (-((self & 1) as i8)) as u8) as i8
126    }
127}
128
129impl VarIntTarget for u16 {
130    type Signed = i16;
131    const MAX_VARINT_BYTES: u8 = 3;
132    const MAX_LAST_VARINT_BYTE: u8 = 0b00000011;
133
134    #[inline(always)]
135    fn vector_to_num(res: [u8; 16]) -> Self {
136        let arr: [u64; 2] = unsafe { core::mem::transmute(res) };
137        let x = arr[0];
138
139        Self::scalar_to_num(x)
140    }
141
142    #[inline(always)]
143    #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
144    fn scalar_to_num(x: u64) -> Self {
145        unsafe { _pext_u64(x, 0x0000000000037f7f) as u16 }
146    }
147
148    #[inline(always)]
149    #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
150    fn scalar_to_num(x: u64) -> Self {
151        ((x & 0x000000000000007f)
152            | ((x & 0x0000000000030000) >> 2)
153            | ((x & 0x0000000000007f00) >> 1)) as u16
154    }
155
156    #[inline(always)]
157    fn cast_u32(num: u32) -> Self {
158        num as u16
159    }
160
161    #[inline(always)]
162    fn cast_u64(num: u64) -> Self {
163        num as u16
164    }
165
166    #[inline(always)]
167    #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
168    fn num_to_scalar_stage1(self) -> u64 {
169        let x = self as u64;
170        unsafe { _pdep_u64(x, 0x0000000000037f7f) }
171    }
172
173    #[inline(always)]
174    #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
175    fn num_to_scalar_stage1(self) -> u64 {
176        let x = self as u64;
177        (x & 0x000000000000007f) | ((x & 0x0000000000003f80) << 1) | ((x & 0x000000000000c000) << 2)
178    }
179
180    #[inline(always)]
181    #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
182    fn num_to_vector_stage1(self) -> [u8; 16] {
183        let mut res = [0u64; 2];
184        let x = self as u64;
185
186        res[0] = self.num_to_scalar_stage1();
187
188        unsafe { core::mem::transmute(res) }
189    }
190
191    #[inline(always)]
192    #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
193    fn num_to_vector_stage1(self) -> [u8; 16] {
194        let mut res = [0u64; 2];
195        res[0] = self.num_to_scalar_stage1();
196
197        unsafe { core::mem::transmute(res) }
198    }
199
200    #[inline(always)]
201    fn zigzag(from: Self::Signed) -> Self {
202        ((from << 1) ^ (from >> 15)) as Self
203    }
204
205    #[inline(always)]
206    fn unzigzag(self) -> Self::Signed {
207        ((self >> 1) ^ (-((self & 1) as i16)) as u16) as i16
208    }
209}
210
211impl VarIntTarget for u32 {
212    type Signed = i32;
213    const MAX_VARINT_BYTES: u8 = 5;
214    const MAX_LAST_VARINT_BYTE: u8 = 0b00001111;
215
216    #[inline(always)]
217    fn vector_to_num(res: [u8; 16]) -> Self {
218        let arr: [u64; 2] = unsafe { core::mem::transmute(res) };
219        let x = arr[0];
220
221        Self::scalar_to_num(x)
222    }
223
224    #[inline(always)]
225    #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
226    fn scalar_to_num(x: u64) -> Self {
227        unsafe { _pext_u64(x, 0x0000000f7f7f7f7f) as u32 }
228    }
229
230    #[inline(always)]
231    #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
232    fn scalar_to_num(x: u64) -> Self {
233        ((x & 0x000000000000007f)
234            | ((x & 0x0000000f00000000) >> 4)
235            | ((x & 0x000000007f000000) >> 3)
236            | ((x & 0x00000000007f0000) >> 2)
237            | ((x & 0x0000000000007f00) >> 1)) as u32
238    }
239
240    #[inline(always)]
241    fn cast_u32(num: u32) -> Self {
242        num
243    }
244
245    #[inline(always)]
246    fn cast_u64(num: u64) -> Self {
247        num as u32
248    }
249
250    #[inline(always)]
251    #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
252    fn num_to_scalar_stage1(self) -> u64 {
253        let x = self as u64;
254        unsafe { _pdep_u64(x, 0x0000000f7f7f7f7f) }
255    }
256
257    #[inline(always)]
258    #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
259    fn num_to_scalar_stage1(self) -> u64 {
260        let x = self as u64;
261        (x & 0x000000000000007f)
262            | ((x & 0x0000000000003f80) << 1)
263            | ((x & 0x00000000001fc000) << 2)
264            | ((x & 0x000000000fe00000) << 3)
265            | ((x & 0x00000000f0000000) << 4)
266    }
267
268    #[inline(always)]
269    #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
270    fn num_to_vector_stage1(self) -> [u8; 16] {
271        let mut res = [0u64; 2];
272        let x = self as u64;
273
274        res[0] = self.num_to_scalar_stage1();
275
276        unsafe { core::mem::transmute(res) }
277    }
278
279    #[inline(always)]
280    #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
281    fn num_to_vector_stage1(self) -> [u8; 16] {
282        let mut res = [0u64; 2];
283        res[0] = self.num_to_scalar_stage1();
284
285        unsafe { core::mem::transmute(res) }
286    }
287
288    #[inline(always)]
289    fn zigzag(from: Self::Signed) -> Self {
290        ((from << 1) ^ (from >> 31)) as Self
291    }
292
293    #[inline(always)]
294    fn unzigzag(self) -> Self::Signed {
295        ((self >> 1) ^ (-((self & 1) as i32)) as u32) as i32
296    }
297}
298
299impl VarIntTarget for u64 {
300    type Signed = i64;
301    const MAX_VARINT_BYTES: u8 = 10;
302    const MAX_LAST_VARINT_BYTE: u8 = 0b00000001;
303
304    fn scalar_to_num(_x: u64) -> Self {
305        unimplemented!("destination too wide")
306    }
307
308    #[inline(always)]
309    #[cfg(all(target_feature = "bmi2", fast_pdep))]
310    fn vector_to_num(res: [u8; 16]) -> Self {
311        let arr: [u64; 2] = unsafe { core::mem::transmute(res) };
312
313        let x = arr[0];
314        let y = arr[1];
315
316        let res = unsafe { _pext_u64(x, 0x7f7f7f7f7f7f7f7f) }
317            | (unsafe { _pext_u64(y, 0x000000000000017f) } << 56);
318
319        res
320    }
321
322    #[inline(always)]
323    #[cfg(all(target_feature = "bmi2", fast_pdep))]
324    fn num_to_vector_stage1(self) -> [u8; 16] {
325        let mut res = [0u64; 2];
326        let x = self;
327
328        res[0] = unsafe { _pdep_u64(x, 0x7f7f7f7f7f7f7f7f) };
329        res[1] = unsafe { _pdep_u64(x >> 56, 0x000000000000017f) };
330
331        unsafe { core::mem::transmute(res) }
332    }
333
334    #[inline(always)]
335    #[cfg(all(target_feature = "avx2", not(all(target_feature = "bmi2", fast_pdep))))]
336    fn vector_to_num(res: [u8; 16]) -> Self {
337        let pt1 = unsafe {
338            let b = core::mem::transmute::<[u8; 16], __m128i>(res);
339
340            let c = _mm_broadcastq_epi64(b);
341            let d = _mm_or_si128(
342                _mm_or_si128(
343                    _mm_srlv_epi64(
344                        _mm_and_si128(c, _mm_set_epi64x(0x000000000000007f, 0x7f00000000000000)),
345                        _mm_set_epi64x(0, 7),
346                    ),
347                    _mm_srlv_epi64(
348                        _mm_and_si128(c, _mm_set_epi64x(0x007f000000000000, 0x00007f0000000000)),
349                        _mm_set_epi64x(6, 5),
350                    ),
351                ),
352                _mm_or_si128(
353                    _mm_srlv_epi64(
354                        _mm_and_si128(c, _mm_set_epi64x(0x0000007f00000000, 0x000000007f000000)),
355                        _mm_set_epi64x(4, 3),
356                    ),
357                    _mm_srlv_epi64(
358                        _mm_and_si128(c, _mm_set_epi64x(0x00000000007f0000, 0x0000000000007f00)),
359                        _mm_set_epi64x(2, 1),
360                    ),
361                ),
362            );
363
364            let e = _mm_or_si128(d, _mm_bsrli_si128(d, 8));
365            _mm_extract_epi64(e, 0) as u64
366        };
367
368        let arr: [u64; 2] = unsafe { core::mem::transmute(res) };
369
370        let y = arr[1];
371
372        // This incantation was generated with calcperm
373        pt1
374            // don't forget about bytes spilling to the other word
375            | ((y & 0x0000000000000100) << 55)
376            | ((y & 0x000000000000007f) << 56)
377    }
378
379    fn num_to_scalar_stage1(self) -> u64 {
380        panic!("source too wide")
381    }
382
383    #[inline(always)]
384    #[cfg(all(target_feature = "avx2", not(all(target_feature = "bmi2", fast_pdep))))]
385    fn num_to_vector_stage1(self) -> [u8; 16] {
386        let mut res = [0u64; 2];
387        let x = self;
388
389        let b = unsafe { _mm_set1_epi64x(self as i64) };
390        let c = unsafe {
391            _mm_or_si128(
392                _mm_or_si128(
393                    _mm_sllv_epi64(
394                        _mm_and_si128(b, _mm_set_epi64x(0x00000007f0000000, 0x000003f800000000)),
395                        _mm_set_epi64x(4, 5),
396                    ),
397                    _mm_sllv_epi64(
398                        _mm_and_si128(b, _mm_set_epi64x(0x0001fc0000000000, 0x00fe000000000000)),
399                        _mm_set_epi64x(6, 7),
400                    ),
401                ),
402                _mm_or_si128(
403                    _mm_sllv_epi64(
404                        _mm_and_si128(b, _mm_set_epi64x(0x000000000000007f, 0x0000000000003f80)),
405                        _mm_set_epi64x(0, 1),
406                    ),
407                    _mm_sllv_epi64(
408                        _mm_and_si128(b, _mm_set_epi64x(0x00000000001fc000, 0x000000000fe00000)),
409                        _mm_set_epi64x(2, 3),
410                    ),
411                ),
412            )
413        };
414        let d = unsafe { _mm_or_si128(c, _mm_bsrli_si128(c, 8)) };
415
416        res[0] = unsafe { _mm_extract_epi64(d, 0) as u64 };
417        res[1] = ((x & 0x7f00000000000000) >> 56) | ((x & 0x8000000000000000) >> 55);
418
419        unsafe { core::mem::transmute(res) }
420    }
421
422    #[inline(always)]
423    #[cfg(not(any(target_feature = "avx2", all(target_feature = "bmi2", fast_pdep))))]
424    fn vector_to_num(res: [u8; 16]) -> Self {
425        let arr: [u64; 2] = unsafe { core::mem::transmute(res) };
426
427        let x = arr[0];
428        let y = arr[1];
429
430        // This incantation was generated with calcperm
431        (x & 0x000000000000007f)
432            | ((x & 0x7f00000000000000) >> 7)
433            | ((x & 0x007f000000000000) >> 6)
434            | ((x & 0x00007f0000000000) >> 5)
435            | ((x & 0x0000007f00000000) >> 4)
436            | ((x & 0x000000007f000000) >> 3)
437            | ((x & 0x00000000007f0000) >> 2)
438            | ((x & 0x0000000000007f00) >> 1)
439            // don't forget about bytes spilling to the other word
440            | ((y & 0x0000000000000100) << 55)
441            | ((y & 0x000000000000007f) << 56)
442    }
443
444    #[inline(always)]
445    #[cfg(not(any(target_feature = "avx2", all(target_feature = "bmi2", fast_pdep))))]
446    fn num_to_vector_stage1(self) -> [u8; 16] {
447        let mut res = [0u64; 2];
448        let x = self;
449
450        res[0] = (x & 0x000000000000007f)
451            | ((x & 0x0000000000003f80) << 1)
452            | ((x & 0x00000000001fc000) << 2)
453            | ((x & 0x000000000fe00000) << 3)
454            | ((x & 0x00000007f0000000) << 4)
455            | ((x & 0x000003f800000000) << 5)
456            | ((x & 0x0001fc0000000000) << 6)
457            | ((x & 0x00fe000000000000) << 7);
458        res[1] = ((x & 0x7f00000000000000) >> 56) | ((x & 0x8000000000000000) >> 55);
459
460        unsafe { core::mem::transmute(res) }
461    }
462
463    #[inline(always)]
464    fn cast_u32(num: u32) -> Self {
465        num as u64
466    }
467
468    #[inline(always)]
469    fn cast_u64(num: u64) -> Self {
470        num
471    }
472
473    #[inline(always)]
474    fn zigzag(from: Self::Signed) -> Self {
475        ((from << 1) ^ (from >> 63)) as Self
476    }
477
478    #[inline(always)]
479    fn unzigzag(self) -> Self::Signed {
480        ((self >> 1) ^ (-((self & 1) as i64)) as u64) as i64
481    }
482}
483
484/// Represents a signed scalar value that can be encoded to and decoded from a varint in ZigZag
485/// format.
486pub trait SignedVarIntTarget: Debug + Eq + PartialEq + Sized + Copy {
487    type Unsigned: VarIntTarget<Signed = Self>;
488
489    /// ZigZag encodes this value
490    #[inline(always)]
491    fn zigzag(from: Self) -> Self::Unsigned {
492        Self::Unsigned::zigzag(from)
493    }
494
495    /// ZigZag decodes this value
496    #[inline(always)]
497    fn unzigzag(from: Self::Unsigned) -> Self {
498        Self::Unsigned::unzigzag(from)
499    }
500}
501
502impl SignedVarIntTarget for i8 {
503    type Unsigned = u8;
504}
505
506impl SignedVarIntTarget for i16 {
507    type Unsigned = u16;
508}
509
510impl SignedVarIntTarget for i32 {
511    type Unsigned = u32;
512}
513
514impl SignedVarIntTarget for i64 {
515    type Unsigned = u64;
516}