1#[cfg(target_arch = "x86")]
2use core::arch::x86::*;
3#[cfg(target_arch = "x86_64")]
4use core::arch::x86_64::*;
5
6use core::fmt::Debug;
7
8pub trait VarIntTarget: Debug + Eq + PartialEq + PartialOrd + Sized + Copy {
10 type Signed: SignedVarIntTarget;
12
13 const MAX_VARINT_BYTES: u8;
15
16 const MAX_LAST_VARINT_BYTE: u8;
19
20 fn vector_to_num(res: [u8; 16]) -> Self;
26
27 fn scalar_to_num(x: u64) -> Self;
28
29 fn cast_u32(num: u32) -> Self;
31
32 fn cast_u64(num: u64) -> Self;
34
35 fn num_to_scalar_stage1(self) -> u64;
37
38 fn num_to_vector_stage1(self) -> [u8; 16];
40
41 fn zigzag(from: Self::Signed) -> Self;
43
44 fn unzigzag(self) -> Self::Signed;
46}
47
48impl VarIntTarget for u8 {
49 type Signed = i8;
50 const MAX_VARINT_BYTES: u8 = 2;
51 const MAX_LAST_VARINT_BYTE: u8 = 0b00000001;
52
53 #[inline(always)]
54 fn vector_to_num(res: [u8; 16]) -> Self {
55 let res: [u64; 2] = unsafe { core::mem::transmute(res) };
56 let x = res[0];
57
58 Self::scalar_to_num(x)
59 }
60
61 #[inline(always)]
62 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
63 fn scalar_to_num(x: u64) -> Self {
64 unsafe { _pext_u64(x, 0x000000000000017f) as u8 }
65 }
66
67 #[inline(always)]
68 #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
69 fn scalar_to_num(x: u64) -> Self {
70 ((x & 0x000000000000007f) | ((x & 0x0000000000000100) >> 1)) as u8
71 }
72
73 #[inline(always)]
74 fn cast_u32(num: u32) -> Self {
75 num as u8
76 }
77
78 #[inline(always)]
79 fn cast_u64(num: u64) -> Self {
80 num as u8
81 }
82
83 #[inline(always)]
84 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
85 fn num_to_scalar_stage1(self) -> u64 {
86 let x = self as u64;
87 unsafe { _pdep_u64(x, 0x000000000000017f) }
88 }
89
90 #[inline(always)]
91 #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
92 fn num_to_scalar_stage1(self) -> u64 {
93 let x = self as u64;
94 (x & 0x000000000000007f) | ((x & 0x0000000000000080) << 1)
95 }
96
97 #[inline(always)]
98 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
99 fn num_to_vector_stage1(self) -> [u8; 16] {
100 let mut res = [0u64; 2];
101 let x = self as u64;
102
103 res[0] = self.num_to_scalar_stage1();
104
105 unsafe { core::mem::transmute(res) }
106 }
107
108 #[inline(always)]
109 #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
110 fn num_to_vector_stage1(self) -> [u8; 16] {
111 let mut res = [0u64; 2];
112
113 res[0] = self.num_to_scalar_stage1();
114
115 unsafe { core::mem::transmute(res) }
116 }
117
118 #[inline(always)]
119 fn zigzag(from: Self::Signed) -> Self {
120 ((from << 1) ^ (from >> 7)) as Self
121 }
122
123 #[inline(always)]
124 fn unzigzag(self) -> Self::Signed {
125 ((self >> 1) ^ (-((self & 1) as i8)) as u8) as i8
126 }
127}
128
129impl VarIntTarget for u16 {
130 type Signed = i16;
131 const MAX_VARINT_BYTES: u8 = 3;
132 const MAX_LAST_VARINT_BYTE: u8 = 0b00000011;
133
134 #[inline(always)]
135 fn vector_to_num(res: [u8; 16]) -> Self {
136 let arr: [u64; 2] = unsafe { core::mem::transmute(res) };
137 let x = arr[0];
138
139 Self::scalar_to_num(x)
140 }
141
142 #[inline(always)]
143 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
144 fn scalar_to_num(x: u64) -> Self {
145 unsafe { _pext_u64(x, 0x0000000000037f7f) as u16 }
146 }
147
148 #[inline(always)]
149 #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
150 fn scalar_to_num(x: u64) -> Self {
151 ((x & 0x000000000000007f)
152 | ((x & 0x0000000000030000) >> 2)
153 | ((x & 0x0000000000007f00) >> 1)) as u16
154 }
155
156 #[inline(always)]
157 fn cast_u32(num: u32) -> Self {
158 num as u16
159 }
160
161 #[inline(always)]
162 fn cast_u64(num: u64) -> Self {
163 num as u16
164 }
165
166 #[inline(always)]
167 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
168 fn num_to_scalar_stage1(self) -> u64 {
169 let x = self as u64;
170 unsafe { _pdep_u64(x, 0x0000000000037f7f) }
171 }
172
173 #[inline(always)]
174 #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
175 fn num_to_scalar_stage1(self) -> u64 {
176 let x = self as u64;
177 (x & 0x000000000000007f) | ((x & 0x0000000000003f80) << 1) | ((x & 0x000000000000c000) << 2)
178 }
179
180 #[inline(always)]
181 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
182 fn num_to_vector_stage1(self) -> [u8; 16] {
183 let mut res = [0u64; 2];
184 let x = self as u64;
185
186 res[0] = self.num_to_scalar_stage1();
187
188 unsafe { core::mem::transmute(res) }
189 }
190
191 #[inline(always)]
192 #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
193 fn num_to_vector_stage1(self) -> [u8; 16] {
194 let mut res = [0u64; 2];
195 res[0] = self.num_to_scalar_stage1();
196
197 unsafe { core::mem::transmute(res) }
198 }
199
200 #[inline(always)]
201 fn zigzag(from: Self::Signed) -> Self {
202 ((from << 1) ^ (from >> 15)) as Self
203 }
204
205 #[inline(always)]
206 fn unzigzag(self) -> Self::Signed {
207 ((self >> 1) ^ (-((self & 1) as i16)) as u16) as i16
208 }
209}
210
211impl VarIntTarget for u32 {
212 type Signed = i32;
213 const MAX_VARINT_BYTES: u8 = 5;
214 const MAX_LAST_VARINT_BYTE: u8 = 0b00001111;
215
216 #[inline(always)]
217 fn vector_to_num(res: [u8; 16]) -> Self {
218 let arr: [u64; 2] = unsafe { core::mem::transmute(res) };
219 let x = arr[0];
220
221 Self::scalar_to_num(x)
222 }
223
224 #[inline(always)]
225 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
226 fn scalar_to_num(x: u64) -> Self {
227 unsafe { _pext_u64(x, 0x0000000f7f7f7f7f) as u32 }
228 }
229
230 #[inline(always)]
231 #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
232 fn scalar_to_num(x: u64) -> Self {
233 ((x & 0x000000000000007f)
234 | ((x & 0x0000000f00000000) >> 4)
235 | ((x & 0x000000007f000000) >> 3)
236 | ((x & 0x00000000007f0000) >> 2)
237 | ((x & 0x0000000000007f00) >> 1)) as u32
238 }
239
240 #[inline(always)]
241 fn cast_u32(num: u32) -> Self {
242 num
243 }
244
245 #[inline(always)]
246 fn cast_u64(num: u64) -> Self {
247 num as u32
248 }
249
250 #[inline(always)]
251 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
252 fn num_to_scalar_stage1(self) -> u64 {
253 let x = self as u64;
254 unsafe { _pdep_u64(x, 0x0000000f7f7f7f7f) }
255 }
256
257 #[inline(always)]
258 #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
259 fn num_to_scalar_stage1(self) -> u64 {
260 let x = self as u64;
261 (x & 0x000000000000007f)
262 | ((x & 0x0000000000003f80) << 1)
263 | ((x & 0x00000000001fc000) << 2)
264 | ((x & 0x000000000fe00000) << 3)
265 | ((x & 0x00000000f0000000) << 4)
266 }
267
268 #[inline(always)]
269 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep))]
270 fn num_to_vector_stage1(self) -> [u8; 16] {
271 let mut res = [0u64; 2];
272 let x = self as u64;
273
274 res[0] = self.num_to_scalar_stage1();
275
276 unsafe { core::mem::transmute(res) }
277 }
278
279 #[inline(always)]
280 #[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2", fast_pdep)))]
281 fn num_to_vector_stage1(self) -> [u8; 16] {
282 let mut res = [0u64; 2];
283 res[0] = self.num_to_scalar_stage1();
284
285 unsafe { core::mem::transmute(res) }
286 }
287
288 #[inline(always)]
289 fn zigzag(from: Self::Signed) -> Self {
290 ((from << 1) ^ (from >> 31)) as Self
291 }
292
293 #[inline(always)]
294 fn unzigzag(self) -> Self::Signed {
295 ((self >> 1) ^ (-((self & 1) as i32)) as u32) as i32
296 }
297}
298
299impl VarIntTarget for u64 {
300 type Signed = i64;
301 const MAX_VARINT_BYTES: u8 = 10;
302 const MAX_LAST_VARINT_BYTE: u8 = 0b00000001;
303
304 fn scalar_to_num(_x: u64) -> Self {
305 unimplemented!("destination too wide")
306 }
307
308 #[inline(always)]
309 #[cfg(all(target_feature = "bmi2", fast_pdep))]
310 fn vector_to_num(res: [u8; 16]) -> Self {
311 let arr: [u64; 2] = unsafe { core::mem::transmute(res) };
312
313 let x = arr[0];
314 let y = arr[1];
315
316 let res = unsafe { _pext_u64(x, 0x7f7f7f7f7f7f7f7f) }
317 | (unsafe { _pext_u64(y, 0x000000000000017f) } << 56);
318
319 res
320 }
321
322 #[inline(always)]
323 #[cfg(all(target_feature = "bmi2", fast_pdep))]
324 fn num_to_vector_stage1(self) -> [u8; 16] {
325 let mut res = [0u64; 2];
326 let x = self;
327
328 res[0] = unsafe { _pdep_u64(x, 0x7f7f7f7f7f7f7f7f) };
329 res[1] = unsafe { _pdep_u64(x >> 56, 0x000000000000017f) };
330
331 unsafe { core::mem::transmute(res) }
332 }
333
334 #[inline(always)]
335 #[cfg(all(target_feature = "avx2", not(all(target_feature = "bmi2", fast_pdep))))]
336 fn vector_to_num(res: [u8; 16]) -> Self {
337 let pt1 = unsafe {
338 let b = core::mem::transmute::<[u8; 16], __m128i>(res);
339
340 let c = _mm_broadcastq_epi64(b);
341 let d = _mm_or_si128(
342 _mm_or_si128(
343 _mm_srlv_epi64(
344 _mm_and_si128(c, _mm_set_epi64x(0x000000000000007f, 0x7f00000000000000)),
345 _mm_set_epi64x(0, 7),
346 ),
347 _mm_srlv_epi64(
348 _mm_and_si128(c, _mm_set_epi64x(0x007f000000000000, 0x00007f0000000000)),
349 _mm_set_epi64x(6, 5),
350 ),
351 ),
352 _mm_or_si128(
353 _mm_srlv_epi64(
354 _mm_and_si128(c, _mm_set_epi64x(0x0000007f00000000, 0x000000007f000000)),
355 _mm_set_epi64x(4, 3),
356 ),
357 _mm_srlv_epi64(
358 _mm_and_si128(c, _mm_set_epi64x(0x00000000007f0000, 0x0000000000007f00)),
359 _mm_set_epi64x(2, 1),
360 ),
361 ),
362 );
363
364 let e = _mm_or_si128(d, _mm_bsrli_si128(d, 8));
365 _mm_extract_epi64(e, 0) as u64
366 };
367
368 let arr: [u64; 2] = unsafe { core::mem::transmute(res) };
369
370 let y = arr[1];
371
372 pt1
374 | ((y & 0x0000000000000100) << 55)
376 | ((y & 0x000000000000007f) << 56)
377 }
378
379 fn num_to_scalar_stage1(self) -> u64 {
380 panic!("source too wide")
381 }
382
383 #[inline(always)]
384 #[cfg(all(target_feature = "avx2", not(all(target_feature = "bmi2", fast_pdep))))]
385 fn num_to_vector_stage1(self) -> [u8; 16] {
386 let mut res = [0u64; 2];
387 let x = self;
388
389 let b = unsafe { _mm_set1_epi64x(self as i64) };
390 let c = unsafe {
391 _mm_or_si128(
392 _mm_or_si128(
393 _mm_sllv_epi64(
394 _mm_and_si128(b, _mm_set_epi64x(0x00000007f0000000, 0x000003f800000000)),
395 _mm_set_epi64x(4, 5),
396 ),
397 _mm_sllv_epi64(
398 _mm_and_si128(b, _mm_set_epi64x(0x0001fc0000000000, 0x00fe000000000000)),
399 _mm_set_epi64x(6, 7),
400 ),
401 ),
402 _mm_or_si128(
403 _mm_sllv_epi64(
404 _mm_and_si128(b, _mm_set_epi64x(0x000000000000007f, 0x0000000000003f80)),
405 _mm_set_epi64x(0, 1),
406 ),
407 _mm_sllv_epi64(
408 _mm_and_si128(b, _mm_set_epi64x(0x00000000001fc000, 0x000000000fe00000)),
409 _mm_set_epi64x(2, 3),
410 ),
411 ),
412 )
413 };
414 let d = unsafe { _mm_or_si128(c, _mm_bsrli_si128(c, 8)) };
415
416 res[0] = unsafe { _mm_extract_epi64(d, 0) as u64 };
417 res[1] = ((x & 0x7f00000000000000) >> 56) | ((x & 0x8000000000000000) >> 55);
418
419 unsafe { core::mem::transmute(res) }
420 }
421
422 #[inline(always)]
423 #[cfg(not(any(target_feature = "avx2", all(target_feature = "bmi2", fast_pdep))))]
424 fn vector_to_num(res: [u8; 16]) -> Self {
425 let arr: [u64; 2] = unsafe { core::mem::transmute(res) };
426
427 let x = arr[0];
428 let y = arr[1];
429
430 (x & 0x000000000000007f)
432 | ((x & 0x7f00000000000000) >> 7)
433 | ((x & 0x007f000000000000) >> 6)
434 | ((x & 0x00007f0000000000) >> 5)
435 | ((x & 0x0000007f00000000) >> 4)
436 | ((x & 0x000000007f000000) >> 3)
437 | ((x & 0x00000000007f0000) >> 2)
438 | ((x & 0x0000000000007f00) >> 1)
439 | ((y & 0x0000000000000100) << 55)
441 | ((y & 0x000000000000007f) << 56)
442 }
443
444 #[inline(always)]
445 #[cfg(not(any(target_feature = "avx2", all(target_feature = "bmi2", fast_pdep))))]
446 fn num_to_vector_stage1(self) -> [u8; 16] {
447 let mut res = [0u64; 2];
448 let x = self;
449
450 res[0] = (x & 0x000000000000007f)
451 | ((x & 0x0000000000003f80) << 1)
452 | ((x & 0x00000000001fc000) << 2)
453 | ((x & 0x000000000fe00000) << 3)
454 | ((x & 0x00000007f0000000) << 4)
455 | ((x & 0x000003f800000000) << 5)
456 | ((x & 0x0001fc0000000000) << 6)
457 | ((x & 0x00fe000000000000) << 7);
458 res[1] = ((x & 0x7f00000000000000) >> 56) | ((x & 0x8000000000000000) >> 55);
459
460 unsafe { core::mem::transmute(res) }
461 }
462
463 #[inline(always)]
464 fn cast_u32(num: u32) -> Self {
465 num as u64
466 }
467
468 #[inline(always)]
469 fn cast_u64(num: u64) -> Self {
470 num
471 }
472
473 #[inline(always)]
474 fn zigzag(from: Self::Signed) -> Self {
475 ((from << 1) ^ (from >> 63)) as Self
476 }
477
478 #[inline(always)]
479 fn unzigzag(self) -> Self::Signed {
480 ((self >> 1) ^ (-((self & 1) as i64)) as u64) as i64
481 }
482}
483
484pub trait SignedVarIntTarget: Debug + Eq + PartialEq + Sized + Copy {
487 type Unsigned: VarIntTarget<Signed = Self>;
488
489 #[inline(always)]
491 fn zigzag(from: Self) -> Self::Unsigned {
492 Self::Unsigned::zigzag(from)
493 }
494
495 #[inline(always)]
497 fn unzigzag(from: Self::Unsigned) -> Self {
498 Self::Unsigned::unzigzag(from)
499 }
500}
501
502impl SignedVarIntTarget for i8 {
503 type Unsigned = u8;
504}
505
506impl SignedVarIntTarget for i16 {
507 type Unsigned = u16;
508}
509
510impl SignedVarIntTarget for i32 {
511 type Unsigned = u32;
512}
513
514impl SignedVarIntTarget for i64 {
515 type Unsigned = u64;
516}