1use arrayref::{array_mut_ref, array_ref};
17use core::mem::size_of;
18use paste::paste;
19
20pub const FL_ORDER: [usize; 8] = [0, 4, 2, 6, 1, 5, 3, 7];
21
22pub trait FastLanes: Sized + Copy {
23 const T: usize = size_of::<Self>() * 8;
24 const LANES: usize = 1024 / Self::T;
25}
26
27impl FastLanes for u8 {}
29impl FastLanes for u16 {}
30impl FastLanes for u32 {}
31impl FastLanes for u64 {}
32
33macro_rules! pack {
34 ($T:ty, $W:expr, $packed:expr, $lane:expr, | $_1:tt $idx:ident | $($body:tt)*) => {
35 macro_rules! __kernel__ {( $_1 $idx:ident ) => ( $($body)* )}
36 {
37 use paste::paste;
38
39 const T: usize = <$T>::T;
41
42 #[inline(always)]
43 fn index(row: usize, lane: usize) -> usize {
44 let o = row / 8;
45 let s = row % 8;
46 (FL_ORDER[o] * 16) + (s * 128) + lane
47 }
48
49 if $W == 0 {
50 } else if $W == T {
52 paste!(seq_t!(row in $T {
54 let idx = index(row, $lane);
55 $packed[<$T>::LANES * row + $lane] = __kernel__!(idx);
56 }));
57 } else {
58 let mask: $T = (1 << $W) - 1;
60
61 let mut tmp: $T = 0;
63
64 paste!(seq_t!(row in $T {
68 let idx = index(row, $lane);
69 let src = __kernel__!(idx);
70 let src = src & mask;
71
72 if row == 0 {
74 tmp = src;
75 } else {
76 tmp |= src << (row * $W) % T;
77 }
78
79 let curr_word: usize = (row * $W) / T;
82 let next_word: usize = ((row + 1) * $W) / T;
83
84 #[allow(unused_assignments)]
85 if next_word > curr_word {
86 $packed[<$T>::LANES * curr_word + $lane] = tmp;
87 let remaining_bits: usize = ((row + 1) * $W) % T;
88 tmp = src >> $W - remaining_bits;
90 }
91 }));
92 }
93 }
94 };
95}
96
97macro_rules! unpack {
98 ($T:ty, $W:expr, $packed:expr, $lane:expr, | $_1:tt $idx:ident, $_2:tt $elem:ident | $($body:tt)*) => {
99 macro_rules! __kernel__ {( $_1 $idx:ident, $_2 $elem:ident ) => ( $($body)* )}
100 {
101 use paste::paste;
102
103 const T: usize = <$T>::T;
105
106 #[inline(always)]
107 fn index(row: usize, lane: usize) -> usize {
108 let o = row / 8;
109 let s = row % 8;
110 (FL_ORDER[o] * 16) + (s * 128) + lane
111 }
112
113 if $W == 0 {
114 paste!(seq_t!(row in $T {
117 let idx = index(row, $lane);
118 let zero: $T = 0;
119 __kernel__!(idx, zero);
120 }));
121 } else if $W == T {
122 paste!(seq_t!(row in $T {
124 let idx = index(row, $lane);
125 let src = $packed[<$T>::LANES * row + $lane];
126 __kernel__!(idx, src);
127 }));
128 } else {
129 #[inline]
130 fn mask(width: usize) -> $T {
131 if width == T { <$T>::MAX } else { (1 << (width % T)) - 1 }
132 }
133
134 let mut src: $T = $packed[$lane];
135 let mut tmp: $T;
136
137 paste!(seq_t!(row in $T {
138 let curr_word: usize = (row * $W) / T;
140 let next_word = ((row + 1) * $W) / T;
141
142 let shift = (row * $W) % T;
143
144 if next_word > curr_word {
145 let remaining_bits = ((row + 1) * $W) % T;
148 let current_bits = $W - remaining_bits;
149 tmp = (src >> shift) & mask(current_bits);
150
151 if next_word < $W {
152 src = $packed[<$T>::LANES * next_word + $lane];
154 tmp |= (src & mask(remaining_bits)) << current_bits;
156 }
157 } else {
158 tmp = (src >> shift) & mask($W);
160 }
161
162 let idx = index(row, $lane);
164 __kernel__!(idx, tmp);
165 }));
166 }
167 }
168 };
169}
170
171macro_rules! seq_t {
173 ($ident:ident in u8 $body:tt) => {seq_macro::seq!($ident in 0..8 $body)};
174 ($ident:ident in u16 $body:tt) => {seq_macro::seq!($ident in 0..16 $body)};
175 ($ident:ident in u32 $body:tt) => {seq_macro::seq!($ident in 0..32 $body)};
176 ($ident:ident in u64 $body:tt) => {seq_macro::seq!($ident in 0..64 $body)};
177}
178
179pub trait BitPacking: FastLanes {
181 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]);
189
190 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]);
198}
199
200impl BitPacking for u8 {
201 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
202 let packed_len = 128 * width / size_of::<Self>();
203 debug_assert_eq!(
204 output.len(),
205 packed_len,
206 "Output buffer must be of size 1024 * W / T"
207 );
208 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
209 debug_assert!(
210 width <= Self::T,
211 "Width must be less than or equal to {}",
212 Self::T
213 );
214
215 match width {
216 0 => {
217 }
219 1 => pack_8_1(
220 array_ref![input, 0, 1024],
221 array_mut_ref![output, 0, 1024 / 8],
222 ),
223 2 => pack_8_2(
224 array_ref![input, 0, 1024],
225 array_mut_ref![output, 0, 1024 * 2 / 8],
226 ),
227 3 => pack_8_3(
228 array_ref![input, 0, 1024],
229 array_mut_ref![output, 0, 1024 * 3 / 8],
230 ),
231 4 => pack_8_4(
232 array_ref![input, 0, 1024],
233 array_mut_ref![output, 0, 1024 * 4 / 8],
234 ),
235 5 => pack_8_5(
236 array_ref![input, 0, 1024],
237 array_mut_ref![output, 0, 1024 * 5 / 8],
238 ),
239 6 => pack_8_6(
240 array_ref![input, 0, 1024],
241 array_mut_ref![output, 0, 1024 * 6 / 8],
242 ),
243 7 => pack_8_7(
244 array_ref![input, 0, 1024],
245 array_mut_ref![output, 0, 1024 * 7 / 8],
246 ),
247 8 => pack_8_8(
248 array_ref![input, 0, 1024],
249 array_mut_ref![output, 0, 1024 * 8 / 8],
250 ),
251
252 _ => unreachable!("Unsupported width: {}", width),
253 }
254 }
255
256 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
257 let packed_len = 128 * width / size_of::<Self>();
258 debug_assert_eq!(
259 input.len(),
260 packed_len,
261 "Input buffer must be of size 1024 * W / T"
262 );
263 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
264 debug_assert!(
265 width <= Self::T,
266 "Width must be less than or equal to {}",
267 Self::T
268 );
269
270 match width {
271 0 => {
272 output.fill(0);
274 }
275 1 => unpack_8_1(
276 array_ref![input, 0, 1024 / 8],
277 array_mut_ref![output, 0, 1024],
278 ),
279 2 => unpack_8_2(
280 array_ref![input, 0, 1024 * 2 / 8],
281 array_mut_ref![output, 0, 1024],
282 ),
283 3 => unpack_8_3(
284 array_ref![input, 0, 1024 * 3 / 8],
285 array_mut_ref![output, 0, 1024],
286 ),
287 4 => unpack_8_4(
288 array_ref![input, 0, 1024 * 4 / 8],
289 array_mut_ref![output, 0, 1024],
290 ),
291 5 => unpack_8_5(
292 array_ref![input, 0, 1024 * 5 / 8],
293 array_mut_ref![output, 0, 1024],
294 ),
295 6 => unpack_8_6(
296 array_ref![input, 0, 1024 * 6 / 8],
297 array_mut_ref![output, 0, 1024],
298 ),
299 7 => unpack_8_7(
300 array_ref![input, 0, 1024 * 7 / 8],
301 array_mut_ref![output, 0, 1024],
302 ),
303 8 => unpack_8_8(
304 array_ref![input, 0, 1024 * 8 / 8],
305 array_mut_ref![output, 0, 1024],
306 ),
307
308 _ => unreachable!("Unsupported width: {}", width),
309 }
310 }
311}
312
313impl BitPacking for u16 {
314 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
315 let packed_len = 128 * width / size_of::<Self>();
316 debug_assert_eq!(
317 output.len(),
318 packed_len,
319 "Output buffer must be of size 1024 * W / T"
320 );
321 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
322 debug_assert!(
323 width <= Self::T,
324 "Width must be less than or equal to {}",
325 Self::T
326 );
327
328 match width {
329 0 => {
330 }
332 1 => pack_16_1(
333 array_ref![input, 0, 1024],
334 array_mut_ref![output, 0, 1024 / 16],
335 ),
336 2 => pack_16_2(
337 array_ref![input, 0, 1024],
338 array_mut_ref![output, 0, 1024 * 2 / 16],
339 ),
340 3 => pack_16_3(
341 array_ref![input, 0, 1024],
342 array_mut_ref![output, 0, 1024 * 3 / 16],
343 ),
344 4 => pack_16_4(
345 array_ref![input, 0, 1024],
346 array_mut_ref![output, 0, 1024 * 4 / 16],
347 ),
348 5 => pack_16_5(
349 array_ref![input, 0, 1024],
350 array_mut_ref![output, 0, 1024 * 5 / 16],
351 ),
352 6 => pack_16_6(
353 array_ref![input, 0, 1024],
354 array_mut_ref![output, 0, 1024 * 6 / 16],
355 ),
356 7 => pack_16_7(
357 array_ref![input, 0, 1024],
358 array_mut_ref![output, 0, 1024 * 7 / 16],
359 ),
360 8 => pack_16_8(
361 array_ref![input, 0, 1024],
362 array_mut_ref![output, 0, 1024 * 8 / 16],
363 ),
364 9 => pack_16_9(
365 array_ref![input, 0, 1024],
366 array_mut_ref![output, 0, 1024 * 9 / 16],
367 ),
368
369 10 => pack_16_10(
370 array_ref![input, 0, 1024],
371 array_mut_ref![output, 0, 1024 * 10 / 16],
372 ),
373 11 => pack_16_11(
374 array_ref![input, 0, 1024],
375 array_mut_ref![output, 0, 1024 * 11 / 16],
376 ),
377 12 => pack_16_12(
378 array_ref![input, 0, 1024],
379 array_mut_ref![output, 0, 1024 * 12 / 16],
380 ),
381 13 => pack_16_13(
382 array_ref![input, 0, 1024],
383 array_mut_ref![output, 0, 1024 * 13 / 16],
384 ),
385 14 => pack_16_14(
386 array_ref![input, 0, 1024],
387 array_mut_ref![output, 0, 1024 * 14 / 16],
388 ),
389 15 => pack_16_15(
390 array_ref![input, 0, 1024],
391 array_mut_ref![output, 0, 1024 * 15 / 16],
392 ),
393 16 => pack_16_16(
394 array_ref![input, 0, 1024],
395 array_mut_ref![output, 0, 1024 * 16 / 16],
396 ),
397
398 _ => unreachable!("Unsupported width: {}", width),
399 }
400 }
401
402 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
403 let packed_len = 128 * width / size_of::<Self>();
404 debug_assert_eq!(
405 input.len(),
406 packed_len,
407 "Input buffer must be of size 1024 * W / T"
408 );
409 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
410 debug_assert!(
411 width <= Self::T,
412 "Width must be less than or equal to {}",
413 Self::T
414 );
415
416 match width {
417 0 => {
418 output.fill(0);
419 }
420 1 => unpack_16_1(
421 array_ref![input, 0, 1024 / 16],
422 array_mut_ref![output, 0, 1024],
423 ),
424 2 => unpack_16_2(
425 array_ref![input, 0, 1024 * 2 / 16],
426 array_mut_ref![output, 0, 1024],
427 ),
428 3 => unpack_16_3(
429 array_ref![input, 0, 1024 * 3 / 16],
430 array_mut_ref![output, 0, 1024],
431 ),
432 4 => unpack_16_4(
433 array_ref![input, 0, 1024 * 4 / 16],
434 array_mut_ref![output, 0, 1024],
435 ),
436 5 => unpack_16_5(
437 array_ref![input, 0, 1024 * 5 / 16],
438 array_mut_ref![output, 0, 1024],
439 ),
440 6 => unpack_16_6(
441 array_ref![input, 0, 1024 * 6 / 16],
442 array_mut_ref![output, 0, 1024],
443 ),
444 7 => unpack_16_7(
445 array_ref![input, 0, 1024 * 7 / 16],
446 array_mut_ref![output, 0, 1024],
447 ),
448 8 => unpack_16_8(
449 array_ref![input, 0, 1024 * 8 / 16],
450 array_mut_ref![output, 0, 1024],
451 ),
452 9 => unpack_16_9(
453 array_ref![input, 0, 1024 * 9 / 16],
454 array_mut_ref![output, 0, 1024],
455 ),
456
457 10 => unpack_16_10(
458 array_ref![input, 0, 1024 * 10 / 16],
459 array_mut_ref![output, 0, 1024],
460 ),
461 11 => unpack_16_11(
462 array_ref![input, 0, 1024 * 11 / 16],
463 array_mut_ref![output, 0, 1024],
464 ),
465 12 => unpack_16_12(
466 array_ref![input, 0, 1024 * 12 / 16],
467 array_mut_ref![output, 0, 1024],
468 ),
469 13 => unpack_16_13(
470 array_ref![input, 0, 1024 * 13 / 16],
471 array_mut_ref![output, 0, 1024],
472 ),
473 14 => unpack_16_14(
474 array_ref![input, 0, 1024 * 14 / 16],
475 array_mut_ref![output, 0, 1024],
476 ),
477 15 => unpack_16_15(
478 array_ref![input, 0, 1024 * 15 / 16],
479 array_mut_ref![output, 0, 1024],
480 ),
481 16 => unpack_16_16(
482 array_ref![input, 0, 1024 * 16 / 16],
483 array_mut_ref![output, 0, 1024],
484 ),
485
486 _ => unreachable!("Unsupported width: {}", width),
487 }
488 }
489}
490
491impl BitPacking for u32 {
492 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
493 let packed_len = 128 * width / size_of::<Self>();
494 debug_assert_eq!(
495 output.len(),
496 packed_len,
497 "Output buffer must be of size 1024 * W / T"
498 );
499 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
500 debug_assert!(
501 width <= Self::T,
502 "Width must be less than or equal to {}",
503 Self::T
504 );
505
506 match width {
507 0 => {
508 }
510 1 => pack_32_1(
511 array_ref![input, 0, 1024],
512 array_mut_ref![output, 0, 1024 / 32],
513 ),
514 2 => pack_32_2(
515 array_ref![input, 0, 1024],
516 array_mut_ref![output, 0, 1024 * 2 / 32],
517 ),
518 3 => pack_32_3(
519 array_ref![input, 0, 1024],
520 array_mut_ref![output, 0, 1024 * 3 / 32],
521 ),
522 4 => pack_32_4(
523 array_ref![input, 0, 1024],
524 array_mut_ref![output, 0, 1024 * 4 / 32],
525 ),
526 5 => pack_32_5(
527 array_ref![input, 0, 1024],
528 array_mut_ref![output, 0, 1024 * 5 / 32],
529 ),
530 6 => pack_32_6(
531 array_ref![input, 0, 1024],
532 array_mut_ref![output, 0, 1024 * 6 / 32],
533 ),
534 7 => pack_32_7(
535 array_ref![input, 0, 1024],
536 array_mut_ref![output, 0, 1024 * 7 / 32],
537 ),
538 8 => pack_32_8(
539 array_ref![input, 0, 1024],
540 array_mut_ref![output, 0, 1024 * 8 / 32],
541 ),
542 9 => pack_32_9(
543 array_ref![input, 0, 1024],
544 array_mut_ref![output, 0, 1024 * 9 / 32],
545 ),
546
547 10 => pack_32_10(
548 array_ref![input, 0, 1024],
549 array_mut_ref![output, 0, 1024 * 10 / 32],
550 ),
551 11 => pack_32_11(
552 array_ref![input, 0, 1024],
553 array_mut_ref![output, 0, 1024 * 11 / 32],
554 ),
555 12 => pack_32_12(
556 array_ref![input, 0, 1024],
557 array_mut_ref![output, 0, 1024 * 12 / 32],
558 ),
559 13 => pack_32_13(
560 array_ref![input, 0, 1024],
561 array_mut_ref![output, 0, 1024 * 13 / 32],
562 ),
563 14 => pack_32_14(
564 array_ref![input, 0, 1024],
565 array_mut_ref![output, 0, 1024 * 14 / 32],
566 ),
567 15 => pack_32_15(
568 array_ref![input, 0, 1024],
569 array_mut_ref![output, 0, 1024 * 15 / 32],
570 ),
571 16 => pack_32_16(
572 array_ref![input, 0, 1024],
573 array_mut_ref![output, 0, 1024 * 16 / 32],
574 ),
575 17 => pack_32_17(
576 array_ref![input, 0, 1024],
577 array_mut_ref![output, 0, 1024 * 17 / 32],
578 ),
579 18 => pack_32_18(
580 array_ref![input, 0, 1024],
581 array_mut_ref![output, 0, 1024 * 18 / 32],
582 ),
583 19 => pack_32_19(
584 array_ref![input, 0, 1024],
585 array_mut_ref![output, 0, 1024 * 19 / 32],
586 ),
587
588 20 => pack_32_20(
589 array_ref![input, 0, 1024],
590 array_mut_ref![output, 0, 1024 * 20 / 32],
591 ),
592 21 => pack_32_21(
593 array_ref![input, 0, 1024],
594 array_mut_ref![output, 0, 1024 * 21 / 32],
595 ),
596 22 => pack_32_22(
597 array_ref![input, 0, 1024],
598 array_mut_ref![output, 0, 1024 * 22 / 32],
599 ),
600 23 => pack_32_23(
601 array_ref![input, 0, 1024],
602 array_mut_ref![output, 0, 1024 * 23 / 32],
603 ),
604 24 => pack_32_24(
605 array_ref![input, 0, 1024],
606 array_mut_ref![output, 0, 1024 * 24 / 32],
607 ),
608 25 => pack_32_25(
609 array_ref![input, 0, 1024],
610 array_mut_ref![output, 0, 1024 * 25 / 32],
611 ),
612 26 => pack_32_26(
613 array_ref![input, 0, 1024],
614 array_mut_ref![output, 0, 1024 * 26 / 32],
615 ),
616 27 => pack_32_27(
617 array_ref![input, 0, 1024],
618 array_mut_ref![output, 0, 1024 * 27 / 32],
619 ),
620 28 => pack_32_28(
621 array_ref![input, 0, 1024],
622 array_mut_ref![output, 0, 1024 * 28 / 32],
623 ),
624 29 => pack_32_29(
625 array_ref![input, 0, 1024],
626 array_mut_ref![output, 0, 1024 * 29 / 32],
627 ),
628
629 30 => pack_32_30(
630 array_ref![input, 0, 1024],
631 array_mut_ref![output, 0, 1024 * 30 / 32],
632 ),
633 31 => pack_32_31(
634 array_ref![input, 0, 1024],
635 array_mut_ref![output, 0, 1024 * 31 / 32],
636 ),
637 32 => pack_32_32(
638 array_ref![input, 0, 1024],
639 array_mut_ref![output, 0, 1024 * 32 / 32],
640 ),
641
642 _ => unreachable!("Unsupported width: {}", width),
643 }
644 }
645
646 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
647 let packed_len = 128 * width / size_of::<Self>();
648 debug_assert_eq!(
649 input.len(),
650 packed_len,
651 "Input buffer must be of size 1024 * W / T"
652 );
653 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
654 debug_assert!(
655 width <= Self::T,
656 "Width must be less than or equal to {}",
657 Self::T
658 );
659
660 match width {
661 0 => {
662 output.fill(0);
663 }
664 1 => unpack_32_1(
665 array_ref![input, 0, 1024 / 32],
666 array_mut_ref![output, 0, 1024],
667 ),
668 2 => unpack_32_2(
669 array_ref![input, 0, 1024 * 2 / 32],
670 array_mut_ref![output, 0, 1024],
671 ),
672 3 => unpack_32_3(
673 array_ref![input, 0, 1024 * 3 / 32],
674 array_mut_ref![output, 0, 1024],
675 ),
676 4 => unpack_32_4(
677 array_ref![input, 0, 1024 * 4 / 32],
678 array_mut_ref![output, 0, 1024],
679 ),
680 5 => unpack_32_5(
681 array_ref![input, 0, 1024 * 5 / 32],
682 array_mut_ref![output, 0, 1024],
683 ),
684 6 => unpack_32_6(
685 array_ref![input, 0, 1024 * 6 / 32],
686 array_mut_ref![output, 0, 1024],
687 ),
688 7 => unpack_32_7(
689 array_ref![input, 0, 1024 * 7 / 32],
690 array_mut_ref![output, 0, 1024],
691 ),
692 8 => unpack_32_8(
693 array_ref![input, 0, 1024 * 8 / 32],
694 array_mut_ref![output, 0, 1024],
695 ),
696 9 => unpack_32_9(
697 array_ref![input, 0, 1024 * 9 / 32],
698 array_mut_ref![output, 0, 1024],
699 ),
700
701 10 => unpack_32_10(
702 array_ref![input, 0, 1024 * 10 / 32],
703 array_mut_ref![output, 0, 1024],
704 ),
705 11 => unpack_32_11(
706 array_ref![input, 0, 1024 * 11 / 32],
707 array_mut_ref![output, 0, 1024],
708 ),
709 12 => unpack_32_12(
710 array_ref![input, 0, 1024 * 12 / 32],
711 array_mut_ref![output, 0, 1024],
712 ),
713 13 => unpack_32_13(
714 array_ref![input, 0, 1024 * 13 / 32],
715 array_mut_ref![output, 0, 1024],
716 ),
717 14 => unpack_32_14(
718 array_ref![input, 0, 1024 * 14 / 32],
719 array_mut_ref![output, 0, 1024],
720 ),
721 15 => unpack_32_15(
722 array_ref![input, 0, 1024 * 15 / 32],
723 array_mut_ref![output, 0, 1024],
724 ),
725 16 => unpack_32_16(
726 array_ref![input, 0, 1024 * 16 / 32],
727 array_mut_ref![output, 0, 1024],
728 ),
729 17 => unpack_32_17(
730 array_ref![input, 0, 1024 * 17 / 32],
731 array_mut_ref![output, 0, 1024],
732 ),
733 18 => unpack_32_18(
734 array_ref![input, 0, 1024 * 18 / 32],
735 array_mut_ref![output, 0, 1024],
736 ),
737 19 => unpack_32_19(
738 array_ref![input, 0, 1024 * 19 / 32],
739 array_mut_ref![output, 0, 1024],
740 ),
741
742 20 => unpack_32_20(
743 array_ref![input, 0, 1024 * 20 / 32],
744 array_mut_ref![output, 0, 1024],
745 ),
746 21 => unpack_32_21(
747 array_ref![input, 0, 1024 * 21 / 32],
748 array_mut_ref![output, 0, 1024],
749 ),
750 22 => unpack_32_22(
751 array_ref![input, 0, 1024 * 22 / 32],
752 array_mut_ref![output, 0, 1024],
753 ),
754 23 => unpack_32_23(
755 array_ref![input, 0, 1024 * 23 / 32],
756 array_mut_ref![output, 0, 1024],
757 ),
758 24 => unpack_32_24(
759 array_ref![input, 0, 1024 * 24 / 32],
760 array_mut_ref![output, 0, 1024],
761 ),
762 25 => unpack_32_25(
763 array_ref![input, 0, 1024 * 25 / 32],
764 array_mut_ref![output, 0, 1024],
765 ),
766 26 => unpack_32_26(
767 array_ref![input, 0, 1024 * 26 / 32],
768 array_mut_ref![output, 0, 1024],
769 ),
770 27 => unpack_32_27(
771 array_ref![input, 0, 1024 * 27 / 32],
772 array_mut_ref![output, 0, 1024],
773 ),
774 28 => unpack_32_28(
775 array_ref![input, 0, 1024 * 28 / 32],
776 array_mut_ref![output, 0, 1024],
777 ),
778 29 => unpack_32_29(
779 array_ref![input, 0, 1024 * 29 / 32],
780 array_mut_ref![output, 0, 1024],
781 ),
782
783 30 => unpack_32_30(
784 array_ref![input, 0, 1024 * 30 / 32],
785 array_mut_ref![output, 0, 1024],
786 ),
787 31 => unpack_32_31(
788 array_ref![input, 0, 1024 * 31 / 32],
789 array_mut_ref![output, 0, 1024],
790 ),
791 32 => unpack_32_32(
792 array_ref![input, 0, 1024 * 32 / 32],
793 array_mut_ref![output, 0, 1024],
794 ),
795
796 _ => unreachable!("Unsupported width: {}", width),
797 }
798 }
799}
800
801impl BitPacking for u64 {
802 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
803 let packed_len = 128 * width / size_of::<Self>();
804 debug_assert_eq!(
805 output.len(),
806 packed_len,
807 "Output buffer must be of size 1024 * W / T"
808 );
809 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
810 debug_assert!(
811 width <= Self::T,
812 "Width must be less than or equal to {}",
813 Self::T
814 );
815
816 match width {
817 0 => {
818 }
820 1 => pack_64_1(
821 array_ref![input, 0, 1024],
822 array_mut_ref![output, 0, 1024 / 64],
823 ),
824 2 => pack_64_2(
825 array_ref![input, 0, 1024],
826 array_mut_ref![output, 0, 1024 * 2 / 64],
827 ),
828 3 => pack_64_3(
829 array_ref![input, 0, 1024],
830 array_mut_ref![output, 0, 1024 * 3 / 64],
831 ),
832 4 => pack_64_4(
833 array_ref![input, 0, 1024],
834 array_mut_ref![output, 0, 1024 * 4 / 64],
835 ),
836 5 => pack_64_5(
837 array_ref![input, 0, 1024],
838 array_mut_ref![output, 0, 1024 * 5 / 64],
839 ),
840 6 => pack_64_6(
841 array_ref![input, 0, 1024],
842 array_mut_ref![output, 0, 1024 * 6 / 64],
843 ),
844 7 => pack_64_7(
845 array_ref![input, 0, 1024],
846 array_mut_ref![output, 0, 1024 * 7 / 64],
847 ),
848 8 => pack_64_8(
849 array_ref![input, 0, 1024],
850 array_mut_ref![output, 0, 1024 * 8 / 64],
851 ),
852 9 => pack_64_9(
853 array_ref![input, 0, 1024],
854 array_mut_ref![output, 0, 1024 * 9 / 64],
855 ),
856
857 10 => pack_64_10(
858 array_ref![input, 0, 1024],
859 array_mut_ref![output, 0, 1024 * 10 / 64],
860 ),
861 11 => pack_64_11(
862 array_ref![input, 0, 1024],
863 array_mut_ref![output, 0, 1024 * 11 / 64],
864 ),
865 12 => pack_64_12(
866 array_ref![input, 0, 1024],
867 array_mut_ref![output, 0, 1024 * 12 / 64],
868 ),
869 13 => pack_64_13(
870 array_ref![input, 0, 1024],
871 array_mut_ref![output, 0, 1024 * 13 / 64],
872 ),
873 14 => pack_64_14(
874 array_ref![input, 0, 1024],
875 array_mut_ref![output, 0, 1024 * 14 / 64],
876 ),
877 15 => pack_64_15(
878 array_ref![input, 0, 1024],
879 array_mut_ref![output, 0, 1024 * 15 / 64],
880 ),
881 16 => pack_64_16(
882 array_ref![input, 0, 1024],
883 array_mut_ref![output, 0, 1024 * 16 / 64],
884 ),
885 17 => pack_64_17(
886 array_ref![input, 0, 1024],
887 array_mut_ref![output, 0, 1024 * 17 / 64],
888 ),
889 18 => pack_64_18(
890 array_ref![input, 0, 1024],
891 array_mut_ref![output, 0, 1024 * 18 / 64],
892 ),
893 19 => pack_64_19(
894 array_ref![input, 0, 1024],
895 array_mut_ref![output, 0, 1024 * 19 / 64],
896 ),
897
898 20 => pack_64_20(
899 array_ref![input, 0, 1024],
900 array_mut_ref![output, 0, 1024 * 20 / 64],
901 ),
902 21 => pack_64_21(
903 array_ref![input, 0, 1024],
904 array_mut_ref![output, 0, 1024 * 21 / 64],
905 ),
906 22 => pack_64_22(
907 array_ref![input, 0, 1024],
908 array_mut_ref![output, 0, 1024 * 22 / 64],
909 ),
910 23 => pack_64_23(
911 array_ref![input, 0, 1024],
912 array_mut_ref![output, 0, 1024 * 23 / 64],
913 ),
914 24 => pack_64_24(
915 array_ref![input, 0, 1024],
916 array_mut_ref![output, 0, 1024 * 24 / 64],
917 ),
918 25 => pack_64_25(
919 array_ref![input, 0, 1024],
920 array_mut_ref![output, 0, 1024 * 25 / 64],
921 ),
922 26 => pack_64_26(
923 array_ref![input, 0, 1024],
924 array_mut_ref![output, 0, 1024 * 26 / 64],
925 ),
926 27 => pack_64_27(
927 array_ref![input, 0, 1024],
928 array_mut_ref![output, 0, 1024 * 27 / 64],
929 ),
930 28 => pack_64_28(
931 array_ref![input, 0, 1024],
932 array_mut_ref![output, 0, 1024 * 28 / 64],
933 ),
934 29 => pack_64_29(
935 array_ref![input, 0, 1024],
936 array_mut_ref![output, 0, 1024 * 29 / 64],
937 ),
938
939 30 => pack_64_30(
940 array_ref![input, 0, 1024],
941 array_mut_ref![output, 0, 1024 * 30 / 64],
942 ),
943 31 => pack_64_31(
944 array_ref![input, 0, 1024],
945 array_mut_ref![output, 0, 1024 * 31 / 64],
946 ),
947 32 => pack_64_32(
948 array_ref![input, 0, 1024],
949 array_mut_ref![output, 0, 1024 * 32 / 64],
950 ),
951 33 => pack_64_33(
952 array_ref![input, 0, 1024],
953 array_mut_ref![output, 0, 1024 * 33 / 64],
954 ),
955 34 => pack_64_34(
956 array_ref![input, 0, 1024],
957 array_mut_ref![output, 0, 1024 * 34 / 64],
958 ),
959 35 => pack_64_35(
960 array_ref![input, 0, 1024],
961 array_mut_ref![output, 0, 1024 * 35 / 64],
962 ),
963 36 => pack_64_36(
964 array_ref![input, 0, 1024],
965 array_mut_ref![output, 0, 1024 * 36 / 64],
966 ),
967 37 => pack_64_37(
968 array_ref![input, 0, 1024],
969 array_mut_ref![output, 0, 1024 * 37 / 64],
970 ),
971 38 => pack_64_38(
972 array_ref![input, 0, 1024],
973 array_mut_ref![output, 0, 1024 * 38 / 64],
974 ),
975 39 => pack_64_39(
976 array_ref![input, 0, 1024],
977 array_mut_ref![output, 0, 1024 * 39 / 64],
978 ),
979
980 40 => pack_64_40(
981 array_ref![input, 0, 1024],
982 array_mut_ref![output, 0, 1024 * 40 / 64],
983 ),
984 41 => pack_64_41(
985 array_ref![input, 0, 1024],
986 array_mut_ref![output, 0, 1024 * 41 / 64],
987 ),
988 42 => pack_64_42(
989 array_ref![input, 0, 1024],
990 array_mut_ref![output, 0, 1024 * 42 / 64],
991 ),
992 43 => pack_64_43(
993 array_ref![input, 0, 1024],
994 array_mut_ref![output, 0, 1024 * 43 / 64],
995 ),
996 44 => pack_64_44(
997 array_ref![input, 0, 1024],
998 array_mut_ref![output, 0, 1024 * 44 / 64],
999 ),
1000 45 => pack_64_45(
1001 array_ref![input, 0, 1024],
1002 array_mut_ref![output, 0, 1024 * 45 / 64],
1003 ),
1004 46 => pack_64_46(
1005 array_ref![input, 0, 1024],
1006 array_mut_ref![output, 0, 1024 * 46 / 64],
1007 ),
1008 47 => pack_64_47(
1009 array_ref![input, 0, 1024],
1010 array_mut_ref![output, 0, 1024 * 47 / 64],
1011 ),
1012 48 => pack_64_48(
1013 array_ref![input, 0, 1024],
1014 array_mut_ref![output, 0, 1024 * 48 / 64],
1015 ),
1016 49 => pack_64_49(
1017 array_ref![input, 0, 1024],
1018 array_mut_ref![output, 0, 1024 * 49 / 64],
1019 ),
1020
1021 50 => pack_64_50(
1022 array_ref![input, 0, 1024],
1023 array_mut_ref![output, 0, 1024 * 50 / 64],
1024 ),
1025 51 => pack_64_51(
1026 array_ref![input, 0, 1024],
1027 array_mut_ref![output, 0, 1024 * 51 / 64],
1028 ),
1029 52 => pack_64_52(
1030 array_ref![input, 0, 1024],
1031 array_mut_ref![output, 0, 1024 * 52 / 64],
1032 ),
1033 53 => pack_64_53(
1034 array_ref![input, 0, 1024],
1035 array_mut_ref![output, 0, 1024 * 53 / 64],
1036 ),
1037 54 => pack_64_54(
1038 array_ref![input, 0, 1024],
1039 array_mut_ref![output, 0, 1024 * 54 / 64],
1040 ),
1041 55 => pack_64_55(
1042 array_ref![input, 0, 1024],
1043 array_mut_ref![output, 0, 1024 * 55 / 64],
1044 ),
1045 56 => pack_64_56(
1046 array_ref![input, 0, 1024],
1047 array_mut_ref![output, 0, 1024 * 56 / 64],
1048 ),
1049 57 => pack_64_57(
1050 array_ref![input, 0, 1024],
1051 array_mut_ref![output, 0, 1024 * 57 / 64],
1052 ),
1053 58 => pack_64_58(
1054 array_ref![input, 0, 1024],
1055 array_mut_ref![output, 0, 1024 * 58 / 64],
1056 ),
1057 59 => pack_64_59(
1058 array_ref![input, 0, 1024],
1059 array_mut_ref![output, 0, 1024 * 59 / 64],
1060 ),
1061
1062 60 => pack_64_60(
1063 array_ref![input, 0, 1024],
1064 array_mut_ref![output, 0, 1024 * 60 / 64],
1065 ),
1066 61 => pack_64_61(
1067 array_ref![input, 0, 1024],
1068 array_mut_ref![output, 0, 1024 * 61 / 64],
1069 ),
1070 62 => pack_64_62(
1071 array_ref![input, 0, 1024],
1072 array_mut_ref![output, 0, 1024 * 62 / 64],
1073 ),
1074 63 => pack_64_63(
1075 array_ref![input, 0, 1024],
1076 array_mut_ref![output, 0, 1024 * 63 / 64],
1077 ),
1078 64 => pack_64_64(
1079 array_ref![input, 0, 1024],
1080 array_mut_ref![output, 0, 1024 * 64 / 64],
1081 ),
1082
1083 _ => unreachable!("Unsupported width: {}", width),
1084 }
1085 }
1086
1087 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
1088 let packed_len = 128 * width / size_of::<Self>();
1089 debug_assert_eq!(
1090 input.len(),
1091 packed_len,
1092 "Input buffer must be of size 1024 * W / T"
1093 );
1094 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
1095 debug_assert!(
1096 width <= Self::T,
1097 "Width must be less than or equal to {}",
1098 Self::T
1099 );
1100
1101 match width {
1102 0 => {
1103 output.fill(0);
1104 }
1105 1 => unpack_64_1(
1106 array_ref![input, 0, 1024 / 64],
1107 array_mut_ref![output, 0, 1024],
1108 ),
1109 2 => unpack_64_2(
1110 array_ref![input, 0, 1024 * 2 / 64],
1111 array_mut_ref![output, 0, 1024],
1112 ),
1113 3 => unpack_64_3(
1114 array_ref![input, 0, 1024 * 3 / 64],
1115 array_mut_ref![output, 0, 1024],
1116 ),
1117 4 => unpack_64_4(
1118 array_ref![input, 0, 1024 * 4 / 64],
1119 array_mut_ref![output, 0, 1024],
1120 ),
1121 5 => unpack_64_5(
1122 array_ref![input, 0, 1024 * 5 / 64],
1123 array_mut_ref![output, 0, 1024],
1124 ),
1125 6 => unpack_64_6(
1126 array_ref![input, 0, 1024 * 6 / 64],
1127 array_mut_ref![output, 0, 1024],
1128 ),
1129 7 => unpack_64_7(
1130 array_ref![input, 0, 1024 * 7 / 64],
1131 array_mut_ref![output, 0, 1024],
1132 ),
1133 8 => unpack_64_8(
1134 array_ref![input, 0, 1024 * 8 / 64],
1135 array_mut_ref![output, 0, 1024],
1136 ),
1137 9 => unpack_64_9(
1138 array_ref![input, 0, 1024 * 9 / 64],
1139 array_mut_ref![output, 0, 1024],
1140 ),
1141
1142 10 => unpack_64_10(
1143 array_ref![input, 0, 1024 * 10 / 64],
1144 array_mut_ref![output, 0, 1024],
1145 ),
1146 11 => unpack_64_11(
1147 array_ref![input, 0, 1024 * 11 / 64],
1148 array_mut_ref![output, 0, 1024],
1149 ),
1150 12 => unpack_64_12(
1151 array_ref![input, 0, 1024 * 12 / 64],
1152 array_mut_ref![output, 0, 1024],
1153 ),
1154 13 => unpack_64_13(
1155 array_ref![input, 0, 1024 * 13 / 64],
1156 array_mut_ref![output, 0, 1024],
1157 ),
1158 14 => unpack_64_14(
1159 array_ref![input, 0, 1024 * 14 / 64],
1160 array_mut_ref![output, 0, 1024],
1161 ),
1162 15 => unpack_64_15(
1163 array_ref![input, 0, 1024 * 15 / 64],
1164 array_mut_ref![output, 0, 1024],
1165 ),
1166 16 => unpack_64_16(
1167 array_ref![input, 0, 1024 * 16 / 64],
1168 array_mut_ref![output, 0, 1024],
1169 ),
1170 17 => unpack_64_17(
1171 array_ref![input, 0, 1024 * 17 / 64],
1172 array_mut_ref![output, 0, 1024],
1173 ),
1174 18 => unpack_64_18(
1175 array_ref![input, 0, 1024 * 18 / 64],
1176 array_mut_ref![output, 0, 1024],
1177 ),
1178 19 => unpack_64_19(
1179 array_ref![input, 0, 1024 * 19 / 64],
1180 array_mut_ref![output, 0, 1024],
1181 ),
1182
1183 20 => unpack_64_20(
1184 array_ref![input, 0, 1024 * 20 / 64],
1185 array_mut_ref![output, 0, 1024],
1186 ),
1187 21 => unpack_64_21(
1188 array_ref![input, 0, 1024 * 21 / 64],
1189 array_mut_ref![output, 0, 1024],
1190 ),
1191 22 => unpack_64_22(
1192 array_ref![input, 0, 1024 * 22 / 64],
1193 array_mut_ref![output, 0, 1024],
1194 ),
1195 23 => unpack_64_23(
1196 array_ref![input, 0, 1024 * 23 / 64],
1197 array_mut_ref![output, 0, 1024],
1198 ),
1199 24 => unpack_64_24(
1200 array_ref![input, 0, 1024 * 24 / 64],
1201 array_mut_ref![output, 0, 1024],
1202 ),
1203 25 => unpack_64_25(
1204 array_ref![input, 0, 1024 * 25 / 64],
1205 array_mut_ref![output, 0, 1024],
1206 ),
1207 26 => unpack_64_26(
1208 array_ref![input, 0, 1024 * 26 / 64],
1209 array_mut_ref![output, 0, 1024],
1210 ),
1211 27 => unpack_64_27(
1212 array_ref![input, 0, 1024 * 27 / 64],
1213 array_mut_ref![output, 0, 1024],
1214 ),
1215 28 => unpack_64_28(
1216 array_ref![input, 0, 1024 * 28 / 64],
1217 array_mut_ref![output, 0, 1024],
1218 ),
1219 29 => unpack_64_29(
1220 array_ref![input, 0, 1024 * 29 / 64],
1221 array_mut_ref![output, 0, 1024],
1222 ),
1223
1224 30 => unpack_64_30(
1225 array_ref![input, 0, 1024 * 30 / 64],
1226 array_mut_ref![output, 0, 1024],
1227 ),
1228 31 => unpack_64_31(
1229 array_ref![input, 0, 1024 * 31 / 64],
1230 array_mut_ref![output, 0, 1024],
1231 ),
1232 32 => unpack_64_32(
1233 array_ref![input, 0, 1024 * 32 / 64],
1234 array_mut_ref![output, 0, 1024],
1235 ),
1236 33 => unpack_64_33(
1237 array_ref![input, 0, 1024 * 33 / 64],
1238 array_mut_ref![output, 0, 1024],
1239 ),
1240 34 => unpack_64_34(
1241 array_ref![input, 0, 1024 * 34 / 64],
1242 array_mut_ref![output, 0, 1024],
1243 ),
1244 35 => unpack_64_35(
1245 array_ref![input, 0, 1024 * 35 / 64],
1246 array_mut_ref![output, 0, 1024],
1247 ),
1248 36 => unpack_64_36(
1249 array_ref![input, 0, 1024 * 36 / 64],
1250 array_mut_ref![output, 0, 1024],
1251 ),
1252 37 => unpack_64_37(
1253 array_ref![input, 0, 1024 * 37 / 64],
1254 array_mut_ref![output, 0, 1024],
1255 ),
1256 38 => unpack_64_38(
1257 array_ref![input, 0, 1024 * 38 / 64],
1258 array_mut_ref![output, 0, 1024],
1259 ),
1260 39 => unpack_64_39(
1261 array_ref![input, 0, 1024 * 39 / 64],
1262 array_mut_ref![output, 0, 1024],
1263 ),
1264
1265 40 => unpack_64_40(
1266 array_ref![input, 0, 1024 * 40 / 64],
1267 array_mut_ref![output, 0, 1024],
1268 ),
1269 41 => unpack_64_41(
1270 array_ref![input, 0, 1024 * 41 / 64],
1271 array_mut_ref![output, 0, 1024],
1272 ),
1273 42 => unpack_64_42(
1274 array_ref![input, 0, 1024 * 42 / 64],
1275 array_mut_ref![output, 0, 1024],
1276 ),
1277 43 => unpack_64_43(
1278 array_ref![input, 0, 1024 * 43 / 64],
1279 array_mut_ref![output, 0, 1024],
1280 ),
1281 44 => unpack_64_44(
1282 array_ref![input, 0, 1024 * 44 / 64],
1283 array_mut_ref![output, 0, 1024],
1284 ),
1285 45 => unpack_64_45(
1286 array_ref![input, 0, 1024 * 45 / 64],
1287 array_mut_ref![output, 0, 1024],
1288 ),
1289 46 => unpack_64_46(
1290 array_ref![input, 0, 1024 * 46 / 64],
1291 array_mut_ref![output, 0, 1024],
1292 ),
1293 47 => unpack_64_47(
1294 array_ref![input, 0, 1024 * 47 / 64],
1295 array_mut_ref![output, 0, 1024],
1296 ),
1297 48 => unpack_64_48(
1298 array_ref![input, 0, 1024 * 48 / 64],
1299 array_mut_ref![output, 0, 1024],
1300 ),
1301 49 => unpack_64_49(
1302 array_ref![input, 0, 1024 * 49 / 64],
1303 array_mut_ref![output, 0, 1024],
1304 ),
1305
1306 50 => unpack_64_50(
1307 array_ref![input, 0, 1024 * 50 / 64],
1308 array_mut_ref![output, 0, 1024],
1309 ),
1310 51 => unpack_64_51(
1311 array_ref![input, 0, 1024 * 51 / 64],
1312 array_mut_ref![output, 0, 1024],
1313 ),
1314 52 => unpack_64_52(
1315 array_ref![input, 0, 1024 * 52 / 64],
1316 array_mut_ref![output, 0, 1024],
1317 ),
1318 53 => unpack_64_53(
1319 array_ref![input, 0, 1024 * 53 / 64],
1320 array_mut_ref![output, 0, 1024],
1321 ),
1322 54 => unpack_64_54(
1323 array_ref![input, 0, 1024 * 54 / 64],
1324 array_mut_ref![output, 0, 1024],
1325 ),
1326 55 => unpack_64_55(
1327 array_ref![input, 0, 1024 * 55 / 64],
1328 array_mut_ref![output, 0, 1024],
1329 ),
1330 56 => unpack_64_56(
1331 array_ref![input, 0, 1024 * 56 / 64],
1332 array_mut_ref![output, 0, 1024],
1333 ),
1334 57 => unpack_64_57(
1335 array_ref![input, 0, 1024 * 57 / 64],
1336 array_mut_ref![output, 0, 1024],
1337 ),
1338 58 => unpack_64_58(
1339 array_ref![input, 0, 1024 * 58 / 64],
1340 array_mut_ref![output, 0, 1024],
1341 ),
1342 59 => unpack_64_59(
1343 array_ref![input, 0, 1024 * 59 / 64],
1344 array_mut_ref![output, 0, 1024],
1345 ),
1346
1347 60 => unpack_64_60(
1348 array_ref![input, 0, 1024 * 60 / 64],
1349 array_mut_ref![output, 0, 1024],
1350 ),
1351 61 => unpack_64_61(
1352 array_ref![input, 0, 1024 * 61 / 64],
1353 array_mut_ref![output, 0, 1024],
1354 ),
1355 62 => unpack_64_62(
1356 array_ref![input, 0, 1024 * 62 / 64],
1357 array_mut_ref![output, 0, 1024],
1358 ),
1359 63 => unpack_64_63(
1360 array_ref![input, 0, 1024 * 63 / 64],
1361 array_mut_ref![output, 0, 1024],
1362 ),
1363 64 => unpack_64_64(
1364 array_ref![input, 0, 1024 * 64 / 64],
1365 array_mut_ref![output, 0, 1024],
1366 ),
1367
1368 _ => unreachable!("Unsupported width: {}", width),
1369 }
1370 }
1371}
1372
1373macro_rules! unpack_8 {
1374 ($name:ident, $bits:expr) => {
1375 fn $name(input: &[u8; 1024 * $bits / u8::T], output: &mut [u8; 1024]) {
1376 for lane in 0..u8::LANES {
1377 unpack!(u8, $bits, input, lane, |$idx, $elem| {
1378 output[$idx] = $elem;
1379 });
1380 }
1381 }
1382 };
1383}
1384
1385unpack_8!(unpack_8_1, 1);
1386unpack_8!(unpack_8_2, 2);
1387unpack_8!(unpack_8_3, 3);
1388unpack_8!(unpack_8_4, 4);
1389unpack_8!(unpack_8_5, 5);
1390unpack_8!(unpack_8_6, 6);
1391unpack_8!(unpack_8_7, 7);
1392unpack_8!(unpack_8_8, 8);
1393
1394macro_rules! pack_8 {
1395 ($name:ident, $bits:expr) => {
1396 fn $name(input: &[u8; 1024], output: &mut [u8; 1024 * $bits / u8::T]) {
1397 for lane in 0..u8::LANES {
1398 pack!(u8, $bits, output, lane, |$idx| { input[$idx] });
1399 }
1400 }
1401 };
1402}
1403pack_8!(pack_8_1, 1);
1404pack_8!(pack_8_2, 2);
1405pack_8!(pack_8_3, 3);
1406pack_8!(pack_8_4, 4);
1407pack_8!(pack_8_5, 5);
1408pack_8!(pack_8_6, 6);
1409pack_8!(pack_8_7, 7);
1410pack_8!(pack_8_8, 8);
1411
1412macro_rules! unpack_16 {
1413 ($name:ident, $bits:expr) => {
1414 fn $name(input: &[u16; 1024 * $bits / u16::T], output: &mut [u16; 1024]) {
1415 for lane in 0..u16::LANES {
1416 unpack!(u16, $bits, input, lane, |$idx, $elem| {
1417 output[$idx] = $elem;
1418 });
1419 }
1420 }
1421 };
1422}
1423
1424unpack_16!(unpack_16_1, 1);
1425unpack_16!(unpack_16_2, 2);
1426unpack_16!(unpack_16_3, 3);
1427unpack_16!(unpack_16_4, 4);
1428unpack_16!(unpack_16_5, 5);
1429unpack_16!(unpack_16_6, 6);
1430unpack_16!(unpack_16_7, 7);
1431unpack_16!(unpack_16_8, 8);
1432unpack_16!(unpack_16_9, 9);
1433unpack_16!(unpack_16_10, 10);
1434unpack_16!(unpack_16_11, 11);
1435unpack_16!(unpack_16_12, 12);
1436unpack_16!(unpack_16_13, 13);
1437unpack_16!(unpack_16_14, 14);
1438unpack_16!(unpack_16_15, 15);
1439unpack_16!(unpack_16_16, 16);
1440
1441macro_rules! pack_16 {
1442 ($name:ident, $bits:expr) => {
1443 fn $name(input: &[u16; 1024], output: &mut [u16; 1024 * $bits / u16::T]) {
1444 for lane in 0..u16::LANES {
1445 pack!(u16, $bits, output, lane, |$idx| { input[$idx] });
1446 }
1447 }
1448 };
1449}
1450
1451pack_16!(pack_16_1, 1);
1452pack_16!(pack_16_2, 2);
1453pack_16!(pack_16_3, 3);
1454pack_16!(pack_16_4, 4);
1455pack_16!(pack_16_5, 5);
1456pack_16!(pack_16_6, 6);
1457pack_16!(pack_16_7, 7);
1458pack_16!(pack_16_8, 8);
1459pack_16!(pack_16_9, 9);
1460pack_16!(pack_16_10, 10);
1461pack_16!(pack_16_11, 11);
1462pack_16!(pack_16_12, 12);
1463pack_16!(pack_16_13, 13);
1464pack_16!(pack_16_14, 14);
1465pack_16!(pack_16_15, 15);
1466pack_16!(pack_16_16, 16);
1467
1468macro_rules! unpack_32 {
1469 ($name:ident, $bit_width:expr) => {
1470 fn $name(input: &[u32; 1024 * $bit_width / u32::T], output: &mut [u32; 1024]) {
1471 for lane in 0..u32::LANES {
1472 unpack!(u32, $bit_width, input, lane, |$idx, $elem| {
1473 output[$idx] = $elem
1474 });
1475 }
1476 }
1477 };
1478}
1479
1480unpack_32!(unpack_32_1, 1);
1481unpack_32!(unpack_32_2, 2);
1482unpack_32!(unpack_32_3, 3);
1483unpack_32!(unpack_32_4, 4);
1484unpack_32!(unpack_32_5, 5);
1485unpack_32!(unpack_32_6, 6);
1486unpack_32!(unpack_32_7, 7);
1487unpack_32!(unpack_32_8, 8);
1488unpack_32!(unpack_32_9, 9);
1489unpack_32!(unpack_32_10, 10);
1490unpack_32!(unpack_32_11, 11);
1491unpack_32!(unpack_32_12, 12);
1492unpack_32!(unpack_32_13, 13);
1493unpack_32!(unpack_32_14, 14);
1494unpack_32!(unpack_32_15, 15);
1495unpack_32!(unpack_32_16, 16);
1496unpack_32!(unpack_32_17, 17);
1497unpack_32!(unpack_32_18, 18);
1498unpack_32!(unpack_32_19, 19);
1499unpack_32!(unpack_32_20, 20);
1500unpack_32!(unpack_32_21, 21);
1501unpack_32!(unpack_32_22, 22);
1502unpack_32!(unpack_32_23, 23);
1503unpack_32!(unpack_32_24, 24);
1504unpack_32!(unpack_32_25, 25);
1505unpack_32!(unpack_32_26, 26);
1506unpack_32!(unpack_32_27, 27);
1507unpack_32!(unpack_32_28, 28);
1508unpack_32!(unpack_32_29, 29);
1509unpack_32!(unpack_32_30, 30);
1510unpack_32!(unpack_32_31, 31);
1511unpack_32!(unpack_32_32, 32);
1512
1513macro_rules! pack_32 {
1514 ($name:ident, $bits:expr) => {
1515 fn $name(input: &[u32; 1024], output: &mut [u32; 1024 * $bits / u32::BITS as usize]) {
1516 for lane in 0..u32::LANES {
1517 pack!(u32, $bits, output, lane, |$idx| { input[$idx] });
1518 }
1519 }
1520 };
1521}
1522
1523pack_32!(pack_32_1, 1);
1524pack_32!(pack_32_2, 2);
1525pack_32!(pack_32_3, 3);
1526pack_32!(pack_32_4, 4);
1527pack_32!(pack_32_5, 5);
1528pack_32!(pack_32_6, 6);
1529pack_32!(pack_32_7, 7);
1530pack_32!(pack_32_8, 8);
1531pack_32!(pack_32_9, 9);
1532pack_32!(pack_32_10, 10);
1533pack_32!(pack_32_11, 11);
1534pack_32!(pack_32_12, 12);
1535pack_32!(pack_32_13, 13);
1536pack_32!(pack_32_14, 14);
1537pack_32!(pack_32_15, 15);
1538pack_32!(pack_32_16, 16);
1539pack_32!(pack_32_17, 17);
1540pack_32!(pack_32_18, 18);
1541pack_32!(pack_32_19, 19);
1542pack_32!(pack_32_20, 20);
1543pack_32!(pack_32_21, 21);
1544pack_32!(pack_32_22, 22);
1545pack_32!(pack_32_23, 23);
1546pack_32!(pack_32_24, 24);
1547pack_32!(pack_32_25, 25);
1548pack_32!(pack_32_26, 26);
1549pack_32!(pack_32_27, 27);
1550pack_32!(pack_32_28, 28);
1551pack_32!(pack_32_29, 29);
1552pack_32!(pack_32_30, 30);
1553pack_32!(pack_32_31, 31);
1554pack_32!(pack_32_32, 32);
1555
1556macro_rules! unpack_64 {
1557 ($name:ident, $bit_width:expr) => {
1558 fn $name(input: &[u64; 1024 * $bit_width / u64::T], output: &mut [u64; 1024]) {
1559 for lane in 0..u64::LANES {
1560 unpack!(u64, $bit_width, input, lane, |$idx, $elem| {
1561 output[$idx] = $elem
1562 });
1563 }
1564 }
1565 };
1566}
1567
1568unpack_64!(unpack_64_1, 1);
1569unpack_64!(unpack_64_2, 2);
1570unpack_64!(unpack_64_3, 3);
1571unpack_64!(unpack_64_4, 4);
1572unpack_64!(unpack_64_5, 5);
1573unpack_64!(unpack_64_6, 6);
1574unpack_64!(unpack_64_7, 7);
1575unpack_64!(unpack_64_8, 8);
1576unpack_64!(unpack_64_9, 9);
1577unpack_64!(unpack_64_10, 10);
1578unpack_64!(unpack_64_11, 11);
1579unpack_64!(unpack_64_12, 12);
1580unpack_64!(unpack_64_13, 13);
1581unpack_64!(unpack_64_14, 14);
1582unpack_64!(unpack_64_15, 15);
1583unpack_64!(unpack_64_16, 16);
1584unpack_64!(unpack_64_17, 17);
1585unpack_64!(unpack_64_18, 18);
1586unpack_64!(unpack_64_19, 19);
1587unpack_64!(unpack_64_20, 20);
1588unpack_64!(unpack_64_21, 21);
1589unpack_64!(unpack_64_22, 22);
1590unpack_64!(unpack_64_23, 23);
1591unpack_64!(unpack_64_24, 24);
1592unpack_64!(unpack_64_25, 25);
1593unpack_64!(unpack_64_26, 26);
1594unpack_64!(unpack_64_27, 27);
1595unpack_64!(unpack_64_28, 28);
1596unpack_64!(unpack_64_29, 29);
1597unpack_64!(unpack_64_30, 30);
1598unpack_64!(unpack_64_31, 31);
1599unpack_64!(unpack_64_32, 32);
1600
1601unpack_64!(unpack_64_33, 33);
1602unpack_64!(unpack_64_34, 34);
1603unpack_64!(unpack_64_35, 35);
1604unpack_64!(unpack_64_36, 36);
1605unpack_64!(unpack_64_37, 37);
1606unpack_64!(unpack_64_38, 38);
1607unpack_64!(unpack_64_39, 39);
1608unpack_64!(unpack_64_40, 40);
1609unpack_64!(unpack_64_41, 41);
1610unpack_64!(unpack_64_42, 42);
1611unpack_64!(unpack_64_43, 43);
1612unpack_64!(unpack_64_44, 44);
1613unpack_64!(unpack_64_45, 45);
1614unpack_64!(unpack_64_46, 46);
1615unpack_64!(unpack_64_47, 47);
1616unpack_64!(unpack_64_48, 48);
1617unpack_64!(unpack_64_49, 49);
1618unpack_64!(unpack_64_50, 50);
1619unpack_64!(unpack_64_51, 51);
1620unpack_64!(unpack_64_52, 52);
1621unpack_64!(unpack_64_53, 53);
1622unpack_64!(unpack_64_54, 54);
1623unpack_64!(unpack_64_55, 55);
1624unpack_64!(unpack_64_56, 56);
1625unpack_64!(unpack_64_57, 57);
1626unpack_64!(unpack_64_58, 58);
1627unpack_64!(unpack_64_59, 59);
1628unpack_64!(unpack_64_60, 60);
1629unpack_64!(unpack_64_61, 61);
1630unpack_64!(unpack_64_62, 62);
1631unpack_64!(unpack_64_63, 63);
1632unpack_64!(unpack_64_64, 64);
1633
1634macro_rules! pack_64 {
1635 ($name:ident, $bits:expr) => {
1636 fn $name(input: &[u64; 1024], output: &mut [u64; 1024 * $bits / u64::BITS as usize]) {
1637 for lane in 0..u64::LANES {
1638 pack!(u64, $bits, output, lane, |$idx| { input[$idx] });
1639 }
1640 }
1641 };
1642}
1643
1644pack_64!(pack_64_1, 1);
1645pack_64!(pack_64_2, 2);
1646pack_64!(pack_64_3, 3);
1647pack_64!(pack_64_4, 4);
1648pack_64!(pack_64_5, 5);
1649pack_64!(pack_64_6, 6);
1650pack_64!(pack_64_7, 7);
1651pack_64!(pack_64_8, 8);
1652pack_64!(pack_64_9, 9);
1653pack_64!(pack_64_10, 10);
1654pack_64!(pack_64_11, 11);
1655pack_64!(pack_64_12, 12);
1656pack_64!(pack_64_13, 13);
1657pack_64!(pack_64_14, 14);
1658pack_64!(pack_64_15, 15);
1659pack_64!(pack_64_16, 16);
1660pack_64!(pack_64_17, 17);
1661pack_64!(pack_64_18, 18);
1662pack_64!(pack_64_19, 19);
1663pack_64!(pack_64_20, 20);
1664pack_64!(pack_64_21, 21);
1665pack_64!(pack_64_22, 22);
1666pack_64!(pack_64_23, 23);
1667pack_64!(pack_64_24, 24);
1668pack_64!(pack_64_25, 25);
1669pack_64!(pack_64_26, 26);
1670pack_64!(pack_64_27, 27);
1671pack_64!(pack_64_28, 28);
1672pack_64!(pack_64_29, 29);
1673pack_64!(pack_64_30, 30);
1674pack_64!(pack_64_31, 31);
1675pack_64!(pack_64_32, 32);
1676
1677pack_64!(pack_64_33, 33);
1678pack_64!(pack_64_34, 34);
1679pack_64!(pack_64_35, 35);
1680pack_64!(pack_64_36, 36);
1681pack_64!(pack_64_37, 37);
1682pack_64!(pack_64_38, 38);
1683pack_64!(pack_64_39, 39);
1684pack_64!(pack_64_40, 40);
1685pack_64!(pack_64_41, 41);
1686pack_64!(pack_64_42, 42);
1687pack_64!(pack_64_43, 43);
1688pack_64!(pack_64_44, 44);
1689pack_64!(pack_64_45, 45);
1690pack_64!(pack_64_46, 46);
1691pack_64!(pack_64_47, 47);
1692pack_64!(pack_64_48, 48);
1693pack_64!(pack_64_49, 49);
1694pack_64!(pack_64_50, 50);
1695pack_64!(pack_64_51, 51);
1696pack_64!(pack_64_52, 52);
1697pack_64!(pack_64_53, 53);
1698pack_64!(pack_64_54, 54);
1699pack_64!(pack_64_55, 55);
1700pack_64!(pack_64_56, 56);
1701pack_64!(pack_64_57, 57);
1702pack_64!(pack_64_58, 58);
1703pack_64!(pack_64_59, 59);
1704pack_64!(pack_64_60, 60);
1705pack_64!(pack_64_61, 61);
1706pack_64!(pack_64_62, 62);
1707pack_64!(pack_64_63, 63);
1708pack_64!(pack_64_64, 64);
1709
1710#[cfg(test)]
1711mod test {
1712 use super::*;
1713 use core::array;
1714 pub struct XorShift {
1716 state: u64,
1717 }
1718
1719 impl XorShift {
1720 pub fn new(seed: u64) -> Self {
1721 Self { state: seed }
1722 }
1723
1724 pub fn next(&mut self) -> u64 {
1725 let mut x = self.state;
1726 x ^= x << 13;
1727 x ^= x >> 7;
1728 x ^= x << 17;
1729 self.state = x;
1730 x
1731 }
1732 }
1733
1734 fn pack_unpack_u8(bit_width: usize) {
1737 let mut values: [u8; 1024] = [0; 1024];
1738 let mut rng = XorShift::new(123456789);
1739 for value in &mut values {
1740 *value = (rng.next() % (1 << bit_width)) as u8;
1741 }
1742
1743 let mut packed = vec![0; 1024 * bit_width / 8];
1744 for lane in 0..u8::LANES {
1745 pack!(u8, bit_width, packed, lane, |$pos| {
1747 values[$pos]
1748 });
1749 }
1750
1751 let mut unpacked: [u8; 1024] = [0; 1024];
1752 for lane in 0..u8::LANES {
1753 unpack!(u8, bit_width, packed, lane, |$idx, $elem| {
1755 unpacked[$idx] = $elem;
1756 });
1757 }
1758
1759 assert_eq!(values, unpacked);
1760 }
1761
1762 fn pack_unpack_u16(bit_width: usize) {
1763 let mut values: [u16; 1024] = [0; 1024];
1764 let mut rng = XorShift::new(123456789);
1765 for value in &mut values {
1766 *value = (rng.next() % (1 << bit_width)) as u16;
1767 }
1768
1769 let mut packed = vec![0; 1024 * bit_width / 16];
1770 for lane in 0..u16::LANES {
1771 pack!(u16, bit_width, packed, lane, |$pos| {
1773 values[$pos]
1774 });
1775 }
1776
1777 let mut unpacked: [u16; 1024] = [0; 1024];
1778 for lane in 0..u16::LANES {
1779 unpack!(u16, bit_width, packed, lane, |$idx, $elem| {
1781 unpacked[$idx] = $elem;
1782 });
1783 }
1784
1785 assert_eq!(values, unpacked);
1786 }
1787
1788 fn pack_unpack_u32(bit_width: usize) {
1789 let mut values: [u32; 1024] = [0; 1024];
1790 let mut rng = XorShift::new(123456789);
1791 for value in &mut values {
1792 *value = (rng.next() % (1 << bit_width)) as u32;
1793 }
1794
1795 let mut packed = vec![0; 1024 * bit_width / 32];
1796 for lane in 0..u32::LANES {
1797 pack!(u32, bit_width, packed, lane, |$pos| {
1799 values[$pos]
1800 });
1801 }
1802
1803 let mut unpacked: [u32; 1024] = [0; 1024];
1804 for lane in 0..u32::LANES {
1805 unpack!(u32, bit_width, packed, lane, |$idx, $elem| {
1807 unpacked[$idx] = $elem;
1808 });
1809 }
1810
1811 assert_eq!(values, unpacked);
1812 }
1813
1814 fn pack_unpack_u64(bit_width: usize) {
1815 let mut values: [u64; 1024] = [0; 1024];
1816 let mut rng = XorShift::new(123456789);
1817 if bit_width == 64 {
1818 for value in &mut values {
1819 *value = rng.next();
1820 }
1821 } else {
1822 for value in &mut values {
1823 *value = rng.next() % (1 << bit_width);
1824 }
1825 }
1826
1827 let mut packed = vec![0; 1024 * bit_width / 64];
1828 for lane in 0..u64::LANES {
1829 pack!(u64, bit_width, packed, lane, |$pos| {
1831 values[$pos]
1832 });
1833 }
1834
1835 let mut unpacked: [u64; 1024] = [0; 1024];
1836 for lane in 0..u64::LANES {
1837 unpack!(u64, bit_width, packed, lane, |$idx, $elem| {
1839 unpacked[$idx] = $elem;
1840 });
1841 }
1842
1843 assert_eq!(values, unpacked);
1844 }
1845
1846 #[test]
1847 fn test_pack() {
1848 pack_unpack_u8(0);
1849 pack_unpack_u8(1);
1850 pack_unpack_u8(2);
1851 pack_unpack_u8(3);
1852 pack_unpack_u8(4);
1853 pack_unpack_u8(5);
1854 pack_unpack_u8(6);
1855 pack_unpack_u8(7);
1856 pack_unpack_u8(8);
1857
1858 pack_unpack_u16(0);
1859 pack_unpack_u16(1);
1860 pack_unpack_u16(2);
1861 pack_unpack_u16(3);
1862 pack_unpack_u16(4);
1863 pack_unpack_u16(5);
1864 pack_unpack_u16(6);
1865 pack_unpack_u16(7);
1866 pack_unpack_u16(8);
1867 pack_unpack_u16(9);
1868 pack_unpack_u16(10);
1869 pack_unpack_u16(11);
1870 pack_unpack_u16(12);
1871 pack_unpack_u16(13);
1872 pack_unpack_u16(14);
1873 pack_unpack_u16(15);
1874 pack_unpack_u16(16);
1875
1876 pack_unpack_u32(0);
1877 pack_unpack_u32(1);
1878 pack_unpack_u32(2);
1879 pack_unpack_u32(3);
1880 pack_unpack_u32(4);
1881 pack_unpack_u32(5);
1882 pack_unpack_u32(6);
1883 pack_unpack_u32(7);
1884 pack_unpack_u32(8);
1885 pack_unpack_u32(9);
1886 pack_unpack_u32(10);
1887 pack_unpack_u32(11);
1888 pack_unpack_u32(12);
1889 pack_unpack_u32(13);
1890 pack_unpack_u32(14);
1891 pack_unpack_u32(15);
1892 pack_unpack_u32(16);
1893 pack_unpack_u32(17);
1894 pack_unpack_u32(18);
1895 pack_unpack_u32(19);
1896 pack_unpack_u32(20);
1897 pack_unpack_u32(21);
1898 pack_unpack_u32(22);
1899 pack_unpack_u32(23);
1900 pack_unpack_u32(24);
1901 pack_unpack_u32(25);
1902 pack_unpack_u32(26);
1903 pack_unpack_u32(27);
1904 pack_unpack_u32(28);
1905 pack_unpack_u32(29);
1906 pack_unpack_u32(30);
1907 pack_unpack_u32(31);
1908 pack_unpack_u32(32);
1909
1910 pack_unpack_u64(0);
1911 pack_unpack_u64(1);
1912 pack_unpack_u64(2);
1913 pack_unpack_u64(3);
1914 pack_unpack_u64(4);
1915 pack_unpack_u64(5);
1916 pack_unpack_u64(6);
1917 pack_unpack_u64(7);
1918 pack_unpack_u64(8);
1919 pack_unpack_u64(9);
1920 pack_unpack_u64(10);
1921 pack_unpack_u64(11);
1922 pack_unpack_u64(12);
1923 pack_unpack_u64(13);
1924 pack_unpack_u64(14);
1925 pack_unpack_u64(15);
1926 pack_unpack_u64(16);
1927 pack_unpack_u64(17);
1928 pack_unpack_u64(18);
1929 pack_unpack_u64(19);
1930 pack_unpack_u64(20);
1931 pack_unpack_u64(21);
1932 pack_unpack_u64(22);
1933 pack_unpack_u64(23);
1934 pack_unpack_u64(24);
1935 pack_unpack_u64(25);
1936 pack_unpack_u64(26);
1937 pack_unpack_u64(27);
1938 pack_unpack_u64(28);
1939 pack_unpack_u64(29);
1940 pack_unpack_u64(30);
1941 pack_unpack_u64(31);
1942 pack_unpack_u64(32);
1943 pack_unpack_u64(33);
1944 pack_unpack_u64(34);
1945 pack_unpack_u64(35);
1946 pack_unpack_u64(36);
1947 pack_unpack_u64(37);
1948 pack_unpack_u64(38);
1949 pack_unpack_u64(39);
1950 pack_unpack_u64(40);
1951 pack_unpack_u64(41);
1952 pack_unpack_u64(42);
1953 pack_unpack_u64(43);
1954 pack_unpack_u64(44);
1955 pack_unpack_u64(45);
1956 pack_unpack_u64(46);
1957 pack_unpack_u64(47);
1958 pack_unpack_u64(48);
1959 pack_unpack_u64(49);
1960 pack_unpack_u64(50);
1961 pack_unpack_u64(51);
1962 pack_unpack_u64(52);
1963 pack_unpack_u64(53);
1964 pack_unpack_u64(54);
1965 pack_unpack_u64(55);
1966 pack_unpack_u64(56);
1967 pack_unpack_u64(57);
1968 pack_unpack_u64(58);
1969 pack_unpack_u64(59);
1970 pack_unpack_u64(60);
1971 pack_unpack_u64(61);
1972 pack_unpack_u64(62);
1973 pack_unpack_u64(63);
1974 pack_unpack_u64(64);
1975 }
1976
1977 fn unchecked_pack_unpack_u8(bit_width: usize) {
1978 let mut values = [0u8; 1024];
1979 let mut rng = XorShift::new(123456789);
1980 for value in &mut values {
1981 *value = (rng.next() % (1 << bit_width)) as u8;
1982 }
1983 let mut packed = vec![0; 1024 * bit_width / 8];
1984 unsafe {
1985 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
1986 }
1987 let mut output = [0; 1024];
1988 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
1989 assert_eq!(values, output);
1990 }
1991
1992 fn unchecked_pack_unpack_u16(bit_width: usize) {
1993 let mut values = [0u16; 1024];
1994 let mut rng = XorShift::new(123456789);
1995 for value in &mut values {
1996 *value = (rng.next() % (1 << bit_width)) as u16;
1997 }
1998 let mut packed = vec![0; 1024 * bit_width / u16::T];
1999 unsafe {
2000 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
2001 }
2002 let mut output = [0; 1024];
2003 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
2004 assert_eq!(values, output);
2005 }
2006
2007 fn unchecked_pack_unpack_u32(bit_width: usize) {
2008 let mut values = [0u32; 1024];
2009 let mut rng = XorShift::new(123456789);
2010 for value in &mut values {
2011 *value = (rng.next() % (1 << bit_width)) as u32;
2012 }
2013 let mut packed = vec![0; 1024 * bit_width / u32::T];
2014 unsafe {
2015 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
2016 }
2017 let mut output = [0; 1024];
2018 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
2019 assert_eq!(values, output);
2020 }
2021
2022 fn unchecked_pack_unpack_u64(bit_width: usize) {
2023 let mut values = [0u64; 1024];
2024 let mut rng = XorShift::new(123456789);
2025 if bit_width == 64 {
2026 for value in &mut values {
2027 *value = rng.next();
2028 }
2029 }
2030 let mut packed = vec![0; 1024 * bit_width / u64::T];
2031 unsafe {
2032 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
2033 }
2034 let mut output = [0; 1024];
2035 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
2036 assert_eq!(values, output);
2037 }
2038
2039 #[test]
2040 fn test_unchecked_pack() {
2041 let input = array::from_fn(|i| i as u32);
2042 let mut packed = [0; 320];
2043 unsafe { BitPacking::unchecked_pack(10, &input, &mut packed) };
2044 let mut output = [0; 1024];
2045 unsafe { BitPacking::unchecked_unpack(10, &packed, &mut output) };
2046 assert_eq!(input, output);
2047
2048 unchecked_pack_unpack_u8(1);
2049 unchecked_pack_unpack_u8(2);
2050 unchecked_pack_unpack_u8(3);
2051 unchecked_pack_unpack_u8(4);
2052 unchecked_pack_unpack_u8(5);
2053 unchecked_pack_unpack_u8(6);
2054 unchecked_pack_unpack_u8(7);
2055 unchecked_pack_unpack_u8(8);
2056
2057 unchecked_pack_unpack_u16(1);
2058 unchecked_pack_unpack_u16(2);
2059 unchecked_pack_unpack_u16(3);
2060 unchecked_pack_unpack_u16(4);
2061 unchecked_pack_unpack_u16(5);
2062 unchecked_pack_unpack_u16(6);
2063 unchecked_pack_unpack_u16(7);
2064 unchecked_pack_unpack_u16(8);
2065 unchecked_pack_unpack_u16(9);
2066 unchecked_pack_unpack_u16(10);
2067 unchecked_pack_unpack_u16(11);
2068 unchecked_pack_unpack_u16(12);
2069 unchecked_pack_unpack_u16(13);
2070 unchecked_pack_unpack_u16(14);
2071 unchecked_pack_unpack_u16(15);
2072 unchecked_pack_unpack_u16(16);
2073
2074 unchecked_pack_unpack_u32(1);
2075 unchecked_pack_unpack_u32(2);
2076 unchecked_pack_unpack_u32(3);
2077 unchecked_pack_unpack_u32(4);
2078 unchecked_pack_unpack_u32(5);
2079 unchecked_pack_unpack_u32(6);
2080 unchecked_pack_unpack_u32(7);
2081 unchecked_pack_unpack_u32(8);
2082 unchecked_pack_unpack_u32(9);
2083 unchecked_pack_unpack_u32(10);
2084 unchecked_pack_unpack_u32(11);
2085 unchecked_pack_unpack_u32(12);
2086 unchecked_pack_unpack_u32(13);
2087 unchecked_pack_unpack_u32(14);
2088 unchecked_pack_unpack_u32(15);
2089 unchecked_pack_unpack_u32(16);
2090 unchecked_pack_unpack_u32(17);
2091 unchecked_pack_unpack_u32(18);
2092 unchecked_pack_unpack_u32(19);
2093 unchecked_pack_unpack_u32(20);
2094 unchecked_pack_unpack_u32(21);
2095 unchecked_pack_unpack_u32(22);
2096 unchecked_pack_unpack_u32(23);
2097 unchecked_pack_unpack_u32(24);
2098 unchecked_pack_unpack_u32(25);
2099 unchecked_pack_unpack_u32(26);
2100 unchecked_pack_unpack_u32(27);
2101 unchecked_pack_unpack_u32(28);
2102 unchecked_pack_unpack_u32(29);
2103 unchecked_pack_unpack_u32(30);
2104 unchecked_pack_unpack_u32(31);
2105 unchecked_pack_unpack_u32(32);
2106
2107 unchecked_pack_unpack_u64(1);
2108 unchecked_pack_unpack_u64(2);
2109 unchecked_pack_unpack_u64(3);
2110 unchecked_pack_unpack_u64(4);
2111 unchecked_pack_unpack_u64(5);
2112 unchecked_pack_unpack_u64(6);
2113 unchecked_pack_unpack_u64(7);
2114 unchecked_pack_unpack_u64(8);
2115 unchecked_pack_unpack_u64(9);
2116 unchecked_pack_unpack_u64(10);
2117 unchecked_pack_unpack_u64(11);
2118 unchecked_pack_unpack_u64(12);
2119 unchecked_pack_unpack_u64(13);
2120 unchecked_pack_unpack_u64(14);
2121 unchecked_pack_unpack_u64(15);
2122 unchecked_pack_unpack_u64(16);
2123 unchecked_pack_unpack_u64(17);
2124 unchecked_pack_unpack_u64(18);
2125 unchecked_pack_unpack_u64(19);
2126 unchecked_pack_unpack_u64(20);
2127 unchecked_pack_unpack_u64(21);
2128 unchecked_pack_unpack_u64(22);
2129 unchecked_pack_unpack_u64(23);
2130 unchecked_pack_unpack_u64(24);
2131 unchecked_pack_unpack_u64(25);
2132 unchecked_pack_unpack_u64(26);
2133 unchecked_pack_unpack_u64(27);
2134 unchecked_pack_unpack_u64(28);
2135 unchecked_pack_unpack_u64(29);
2136 unchecked_pack_unpack_u64(30);
2137 unchecked_pack_unpack_u64(31);
2138 unchecked_pack_unpack_u64(32);
2139 unchecked_pack_unpack_u64(33);
2140 unchecked_pack_unpack_u64(34);
2141 unchecked_pack_unpack_u64(35);
2142 unchecked_pack_unpack_u64(36);
2143 unchecked_pack_unpack_u64(37);
2144 unchecked_pack_unpack_u64(38);
2145 unchecked_pack_unpack_u64(39);
2146 unchecked_pack_unpack_u64(40);
2147 unchecked_pack_unpack_u64(41);
2148 unchecked_pack_unpack_u64(42);
2149 unchecked_pack_unpack_u64(43);
2150 unchecked_pack_unpack_u64(44);
2151 unchecked_pack_unpack_u64(45);
2152 unchecked_pack_unpack_u64(46);
2153 unchecked_pack_unpack_u64(47);
2154 unchecked_pack_unpack_u64(48);
2155 unchecked_pack_unpack_u64(49);
2156 unchecked_pack_unpack_u64(50);
2157 unchecked_pack_unpack_u64(51);
2158 unchecked_pack_unpack_u64(52);
2159 unchecked_pack_unpack_u64(53);
2160 unchecked_pack_unpack_u64(54);
2161 unchecked_pack_unpack_u64(55);
2162 unchecked_pack_unpack_u64(56);
2163 unchecked_pack_unpack_u64(57);
2164 unchecked_pack_unpack_u64(58);
2165 unchecked_pack_unpack_u64(59);
2166 unchecked_pack_unpack_u64(60);
2167 unchecked_pack_unpack_u64(61);
2168 unchecked_pack_unpack_u64(62);
2169 unchecked_pack_unpack_u64(63);
2170 unchecked_pack_unpack_u64(64);
2171 }
2172}