1use arrayref::{array_mut_ref, array_ref};
17use core::mem::size_of;
18
19pub const FL_ORDER: [usize; 8] = [0, 4, 2, 6, 1, 5, 3, 7];
20
21pub trait FastLanes: Sized + Copy {
22 const T: usize = size_of::<Self>() * 8;
23 const LANES: usize = 1024 / Self::T;
24}
25
26impl FastLanes for u8 {}
28impl FastLanes for u16 {}
29impl FastLanes for u32 {}
30impl FastLanes for u64 {}
31
32macro_rules! pack {
33 ($T:ty, $W:expr, $packed:expr, $lane:expr, | $_1:tt $idx:ident | $($body:tt)*) => {
34 macro_rules! __kernel__ {( $_1 $idx:ident ) => ( $($body)* )}
35 {
36 use paste::paste;
37
38 const T: usize = <$T>::T;
40
41 #[inline(always)]
42 fn index(row: usize, lane: usize) -> usize {
43 let o = row / 8;
44 let s = row % 8;
45 (FL_ORDER[o] * 16) + (s * 128) + lane
46 }
47
48 if $W == 0 {
49 } else if $W == T {
51 paste!(seq_t!(row in $T {
53 let idx = index(row, $lane);
54 $packed[<$T>::LANES * row + $lane] = __kernel__!(idx);
55 }));
56 } else {
57 let mask: $T = (1 << $W) - 1;
59
60 let mut tmp: $T = 0;
62
63 paste!(seq_t!(row in $T {
67 let idx = index(row, $lane);
68 let src = __kernel__!(idx);
69 let src = src & mask;
70
71 if row == 0 {
73 tmp = src;
74 } else {
75 tmp |= src << (row * $W) % T;
76 }
77
78 let curr_word: usize = (row * $W) / T;
81 let next_word: usize = ((row + 1) * $W) / T;
82
83 #[allow(unused_assignments)]
84 if next_word > curr_word {
85 $packed[<$T>::LANES * curr_word + $lane] = tmp;
86 let remaining_bits: usize = ((row + 1) * $W) % T;
87 tmp = src >> $W - remaining_bits;
89 }
90 }));
91 }
92 }
93 };
94}
95
96macro_rules! unpack {
97 ($T:ty, $W:expr, $packed:expr, $lane:expr, | $_1:tt $idx:ident, $_2:tt $elem:ident | $($body:tt)*) => {
98 macro_rules! __kernel__ {( $_1 $idx:ident, $_2 $elem:ident ) => ( $($body)* )}
99 {
100 use paste::paste;
101
102 const T: usize = <$T>::T;
104
105 #[inline(always)]
106 fn index(row: usize, lane: usize) -> usize {
107 let o = row / 8;
108 let s = row % 8;
109 (FL_ORDER[o] * 16) + (s * 128) + lane
110 }
111
112 if $W == 0 {
113 paste!(seq_t!(row in $T {
116 let idx = index(row, $lane);
117 let zero: $T = 0;
118 __kernel__!(idx, zero);
119 }));
120 } else if $W == T {
121 paste!(seq_t!(row in $T {
123 let idx = index(row, $lane);
124 let src = $packed[<$T>::LANES * row + $lane];
125 __kernel__!(idx, src);
126 }));
127 } else {
128 #[inline]
129 fn mask(width: usize) -> $T {
130 if width == T { <$T>::MAX } else { (1 << (width % T)) - 1 }
131 }
132
133 let mut src: $T = $packed[$lane];
134 let mut tmp: $T;
135
136 paste!(seq_t!(row in $T {
137 let curr_word: usize = (row * $W) / T;
139 let next_word = ((row + 1) * $W) / T;
140
141 let shift = (row * $W) % T;
142
143 if next_word > curr_word {
144 let remaining_bits = ((row + 1) * $W) % T;
147 let current_bits = $W - remaining_bits;
148 tmp = (src >> shift) & mask(current_bits);
149
150 if next_word < $W {
151 src = $packed[<$T>::LANES * next_word + $lane];
153 tmp |= (src & mask(remaining_bits)) << current_bits;
155 }
156 } else {
157 tmp = (src >> shift) & mask($W);
159 }
160
161 let idx = index(row, $lane);
163 __kernel__!(idx, tmp);
164 }));
165 }
166 }
167 };
168}
169
170macro_rules! seq_t {
172 ($ident:ident in u8 $body:tt) => {seq_macro::seq!($ident in 0..8 $body)};
173 ($ident:ident in u16 $body:tt) => {seq_macro::seq!($ident in 0..16 $body)};
174 ($ident:ident in u32 $body:tt) => {seq_macro::seq!($ident in 0..32 $body)};
175 ($ident:ident in u64 $body:tt) => {seq_macro::seq!($ident in 0..64 $body)};
176}
177
178pub trait BitPacking: FastLanes {
180 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]);
188
189 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]);
197}
198
199impl BitPacking for u8 {
200 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
201 let packed_len = 128 * width / size_of::<Self>();
202 debug_assert_eq!(
203 output.len(),
204 packed_len,
205 "Output buffer must be of size 1024 * W / T"
206 );
207 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
208 debug_assert!(
209 width <= Self::T,
210 "Width must be less than or equal to {}",
211 Self::T
212 );
213
214 match width {
215 0 => {
216 }
218 1 => pack_8_1(
219 array_ref![input, 0, 1024],
220 array_mut_ref![output, 0, 1024 / 8],
221 ),
222 2 => pack_8_2(
223 array_ref![input, 0, 1024],
224 array_mut_ref![output, 0, 1024 * 2 / 8],
225 ),
226 3 => pack_8_3(
227 array_ref![input, 0, 1024],
228 array_mut_ref![output, 0, 1024 * 3 / 8],
229 ),
230 4 => pack_8_4(
231 array_ref![input, 0, 1024],
232 array_mut_ref![output, 0, 1024 * 4 / 8],
233 ),
234 5 => pack_8_5(
235 array_ref![input, 0, 1024],
236 array_mut_ref![output, 0, 1024 * 5 / 8],
237 ),
238 6 => pack_8_6(
239 array_ref![input, 0, 1024],
240 array_mut_ref![output, 0, 1024 * 6 / 8],
241 ),
242 7 => pack_8_7(
243 array_ref![input, 0, 1024],
244 array_mut_ref![output, 0, 1024 * 7 / 8],
245 ),
246 8 => pack_8_8(
247 array_ref![input, 0, 1024],
248 array_mut_ref![output, 0, 1024 * 8 / 8],
249 ),
250
251 _ => unreachable!("Unsupported width: {}", width),
252 }
253 }
254
255 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
256 let packed_len = 128 * width / size_of::<Self>();
257 debug_assert_eq!(
258 input.len(),
259 packed_len,
260 "Input buffer must be of size 1024 * W / T"
261 );
262 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
263 debug_assert!(
264 width <= Self::T,
265 "Width must be less than or equal to {}",
266 Self::T
267 );
268
269 match width {
270 0 => {
271 output.fill(0);
273 }
274 1 => unpack_8_1(
275 array_ref![input, 0, 1024 / 8],
276 array_mut_ref![output, 0, 1024],
277 ),
278 2 => unpack_8_2(
279 array_ref![input, 0, 1024 * 2 / 8],
280 array_mut_ref![output, 0, 1024],
281 ),
282 3 => unpack_8_3(
283 array_ref![input, 0, 1024 * 3 / 8],
284 array_mut_ref![output, 0, 1024],
285 ),
286 4 => unpack_8_4(
287 array_ref![input, 0, 1024 * 4 / 8],
288 array_mut_ref![output, 0, 1024],
289 ),
290 5 => unpack_8_5(
291 array_ref![input, 0, 1024 * 5 / 8],
292 array_mut_ref![output, 0, 1024],
293 ),
294 6 => unpack_8_6(
295 array_ref![input, 0, 1024 * 6 / 8],
296 array_mut_ref![output, 0, 1024],
297 ),
298 7 => unpack_8_7(
299 array_ref![input, 0, 1024 * 7 / 8],
300 array_mut_ref![output, 0, 1024],
301 ),
302 8 => unpack_8_8(
303 array_ref![input, 0, 1024 * 8 / 8],
304 array_mut_ref![output, 0, 1024],
305 ),
306
307 _ => unreachable!("Unsupported width: {}", width),
308 }
309 }
310}
311
312impl BitPacking for u16 {
313 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
314 let packed_len = 128 * width / size_of::<Self>();
315 debug_assert_eq!(
316 output.len(),
317 packed_len,
318 "Output buffer must be of size 1024 * W / T"
319 );
320 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
321 debug_assert!(
322 width <= Self::T,
323 "Width must be less than or equal to {}",
324 Self::T
325 );
326
327 match width {
328 0 => {
329 }
331 1 => pack_16_1(
332 array_ref![input, 0, 1024],
333 array_mut_ref![output, 0, 1024 / 16],
334 ),
335 2 => pack_16_2(
336 array_ref![input, 0, 1024],
337 array_mut_ref![output, 0, 1024 * 2 / 16],
338 ),
339 3 => pack_16_3(
340 array_ref![input, 0, 1024],
341 array_mut_ref![output, 0, 1024 * 3 / 16],
342 ),
343 4 => pack_16_4(
344 array_ref![input, 0, 1024],
345 array_mut_ref![output, 0, 1024 * 4 / 16],
346 ),
347 5 => pack_16_5(
348 array_ref![input, 0, 1024],
349 array_mut_ref![output, 0, 1024 * 5 / 16],
350 ),
351 6 => pack_16_6(
352 array_ref![input, 0, 1024],
353 array_mut_ref![output, 0, 1024 * 6 / 16],
354 ),
355 7 => pack_16_7(
356 array_ref![input, 0, 1024],
357 array_mut_ref![output, 0, 1024 * 7 / 16],
358 ),
359 8 => pack_16_8(
360 array_ref![input, 0, 1024],
361 array_mut_ref![output, 0, 1024 * 8 / 16],
362 ),
363 9 => pack_16_9(
364 array_ref![input, 0, 1024],
365 array_mut_ref![output, 0, 1024 * 9 / 16],
366 ),
367
368 10 => pack_16_10(
369 array_ref![input, 0, 1024],
370 array_mut_ref![output, 0, 1024 * 10 / 16],
371 ),
372 11 => pack_16_11(
373 array_ref![input, 0, 1024],
374 array_mut_ref![output, 0, 1024 * 11 / 16],
375 ),
376 12 => pack_16_12(
377 array_ref![input, 0, 1024],
378 array_mut_ref![output, 0, 1024 * 12 / 16],
379 ),
380 13 => pack_16_13(
381 array_ref![input, 0, 1024],
382 array_mut_ref![output, 0, 1024 * 13 / 16],
383 ),
384 14 => pack_16_14(
385 array_ref![input, 0, 1024],
386 array_mut_ref![output, 0, 1024 * 14 / 16],
387 ),
388 15 => pack_16_15(
389 array_ref![input, 0, 1024],
390 array_mut_ref![output, 0, 1024 * 15 / 16],
391 ),
392 16 => pack_16_16(
393 array_ref![input, 0, 1024],
394 array_mut_ref![output, 0, 1024 * 16 / 16],
395 ),
396
397 _ => unreachable!("Unsupported width: {}", width),
398 }
399 }
400
401 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
402 let packed_len = 128 * width / size_of::<Self>();
403 debug_assert_eq!(
404 input.len(),
405 packed_len,
406 "Input buffer must be of size 1024 * W / T"
407 );
408 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
409 debug_assert!(
410 width <= Self::T,
411 "Width must be less than or equal to {}",
412 Self::T
413 );
414
415 match width {
416 0 => {
417 output.fill(0);
418 }
419 1 => unpack_16_1(
420 array_ref![input, 0, 1024 / 16],
421 array_mut_ref![output, 0, 1024],
422 ),
423 2 => unpack_16_2(
424 array_ref![input, 0, 1024 * 2 / 16],
425 array_mut_ref![output, 0, 1024],
426 ),
427 3 => unpack_16_3(
428 array_ref![input, 0, 1024 * 3 / 16],
429 array_mut_ref![output, 0, 1024],
430 ),
431 4 => unpack_16_4(
432 array_ref![input, 0, 1024 * 4 / 16],
433 array_mut_ref![output, 0, 1024],
434 ),
435 5 => unpack_16_5(
436 array_ref![input, 0, 1024 * 5 / 16],
437 array_mut_ref![output, 0, 1024],
438 ),
439 6 => unpack_16_6(
440 array_ref![input, 0, 1024 * 6 / 16],
441 array_mut_ref![output, 0, 1024],
442 ),
443 7 => unpack_16_7(
444 array_ref![input, 0, 1024 * 7 / 16],
445 array_mut_ref![output, 0, 1024],
446 ),
447 8 => unpack_16_8(
448 array_ref![input, 0, 1024 * 8 / 16],
449 array_mut_ref![output, 0, 1024],
450 ),
451 9 => unpack_16_9(
452 array_ref![input, 0, 1024 * 9 / 16],
453 array_mut_ref![output, 0, 1024],
454 ),
455
456 10 => unpack_16_10(
457 array_ref![input, 0, 1024 * 10 / 16],
458 array_mut_ref![output, 0, 1024],
459 ),
460 11 => unpack_16_11(
461 array_ref![input, 0, 1024 * 11 / 16],
462 array_mut_ref![output, 0, 1024],
463 ),
464 12 => unpack_16_12(
465 array_ref![input, 0, 1024 * 12 / 16],
466 array_mut_ref![output, 0, 1024],
467 ),
468 13 => unpack_16_13(
469 array_ref![input, 0, 1024 * 13 / 16],
470 array_mut_ref![output, 0, 1024],
471 ),
472 14 => unpack_16_14(
473 array_ref![input, 0, 1024 * 14 / 16],
474 array_mut_ref![output, 0, 1024],
475 ),
476 15 => unpack_16_15(
477 array_ref![input, 0, 1024 * 15 / 16],
478 array_mut_ref![output, 0, 1024],
479 ),
480 16 => unpack_16_16(
481 array_ref![input, 0, 1024 * 16 / 16],
482 array_mut_ref![output, 0, 1024],
483 ),
484
485 _ => unreachable!("Unsupported width: {}", width),
486 }
487 }
488}
489
490impl BitPacking for u32 {
491 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
492 let packed_len = 128 * width / size_of::<Self>();
493 debug_assert_eq!(
494 output.len(),
495 packed_len,
496 "Output buffer must be of size 1024 * W / T"
497 );
498 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
499 debug_assert!(
500 width <= Self::T,
501 "Width must be less than or equal to {}",
502 Self::T
503 );
504
505 match width {
506 0 => {
507 }
509 1 => pack_32_1(
510 array_ref![input, 0, 1024],
511 array_mut_ref![output, 0, 1024 / 32],
512 ),
513 2 => pack_32_2(
514 array_ref![input, 0, 1024],
515 array_mut_ref![output, 0, 1024 * 2 / 32],
516 ),
517 3 => pack_32_3(
518 array_ref![input, 0, 1024],
519 array_mut_ref![output, 0, 1024 * 3 / 32],
520 ),
521 4 => pack_32_4(
522 array_ref![input, 0, 1024],
523 array_mut_ref![output, 0, 1024 * 4 / 32],
524 ),
525 5 => pack_32_5(
526 array_ref![input, 0, 1024],
527 array_mut_ref![output, 0, 1024 * 5 / 32],
528 ),
529 6 => pack_32_6(
530 array_ref![input, 0, 1024],
531 array_mut_ref![output, 0, 1024 * 6 / 32],
532 ),
533 7 => pack_32_7(
534 array_ref![input, 0, 1024],
535 array_mut_ref![output, 0, 1024 * 7 / 32],
536 ),
537 8 => pack_32_8(
538 array_ref![input, 0, 1024],
539 array_mut_ref![output, 0, 1024 * 8 / 32],
540 ),
541 9 => pack_32_9(
542 array_ref![input, 0, 1024],
543 array_mut_ref![output, 0, 1024 * 9 / 32],
544 ),
545
546 10 => pack_32_10(
547 array_ref![input, 0, 1024],
548 array_mut_ref![output, 0, 1024 * 10 / 32],
549 ),
550 11 => pack_32_11(
551 array_ref![input, 0, 1024],
552 array_mut_ref![output, 0, 1024 * 11 / 32],
553 ),
554 12 => pack_32_12(
555 array_ref![input, 0, 1024],
556 array_mut_ref![output, 0, 1024 * 12 / 32],
557 ),
558 13 => pack_32_13(
559 array_ref![input, 0, 1024],
560 array_mut_ref![output, 0, 1024 * 13 / 32],
561 ),
562 14 => pack_32_14(
563 array_ref![input, 0, 1024],
564 array_mut_ref![output, 0, 1024 * 14 / 32],
565 ),
566 15 => pack_32_15(
567 array_ref![input, 0, 1024],
568 array_mut_ref![output, 0, 1024 * 15 / 32],
569 ),
570 16 => pack_32_16(
571 array_ref![input, 0, 1024],
572 array_mut_ref![output, 0, 1024 * 16 / 32],
573 ),
574 17 => pack_32_17(
575 array_ref![input, 0, 1024],
576 array_mut_ref![output, 0, 1024 * 17 / 32],
577 ),
578 18 => pack_32_18(
579 array_ref![input, 0, 1024],
580 array_mut_ref![output, 0, 1024 * 18 / 32],
581 ),
582 19 => pack_32_19(
583 array_ref![input, 0, 1024],
584 array_mut_ref![output, 0, 1024 * 19 / 32],
585 ),
586
587 20 => pack_32_20(
588 array_ref![input, 0, 1024],
589 array_mut_ref![output, 0, 1024 * 20 / 32],
590 ),
591 21 => pack_32_21(
592 array_ref![input, 0, 1024],
593 array_mut_ref![output, 0, 1024 * 21 / 32],
594 ),
595 22 => pack_32_22(
596 array_ref![input, 0, 1024],
597 array_mut_ref![output, 0, 1024 * 22 / 32],
598 ),
599 23 => pack_32_23(
600 array_ref![input, 0, 1024],
601 array_mut_ref![output, 0, 1024 * 23 / 32],
602 ),
603 24 => pack_32_24(
604 array_ref![input, 0, 1024],
605 array_mut_ref![output, 0, 1024 * 24 / 32],
606 ),
607 25 => pack_32_25(
608 array_ref![input, 0, 1024],
609 array_mut_ref![output, 0, 1024 * 25 / 32],
610 ),
611 26 => pack_32_26(
612 array_ref![input, 0, 1024],
613 array_mut_ref![output, 0, 1024 * 26 / 32],
614 ),
615 27 => pack_32_27(
616 array_ref![input, 0, 1024],
617 array_mut_ref![output, 0, 1024 * 27 / 32],
618 ),
619 28 => pack_32_28(
620 array_ref![input, 0, 1024],
621 array_mut_ref![output, 0, 1024 * 28 / 32],
622 ),
623 29 => pack_32_29(
624 array_ref![input, 0, 1024],
625 array_mut_ref![output, 0, 1024 * 29 / 32],
626 ),
627
628 30 => pack_32_30(
629 array_ref![input, 0, 1024],
630 array_mut_ref![output, 0, 1024 * 30 / 32],
631 ),
632 31 => pack_32_31(
633 array_ref![input, 0, 1024],
634 array_mut_ref![output, 0, 1024 * 31 / 32],
635 ),
636 32 => pack_32_32(
637 array_ref![input, 0, 1024],
638 array_mut_ref![output, 0, 1024 * 32 / 32],
639 ),
640
641 _ => unreachable!("Unsupported width: {}", width),
642 }
643 }
644
645 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
646 let packed_len = 128 * width / size_of::<Self>();
647 debug_assert_eq!(
648 input.len(),
649 packed_len,
650 "Input buffer must be of size 1024 * W / T"
651 );
652 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
653 debug_assert!(
654 width <= Self::T,
655 "Width must be less than or equal to {}",
656 Self::T
657 );
658
659 match width {
660 0 => {
661 output.fill(0);
662 }
663 1 => unpack_32_1(
664 array_ref![input, 0, 1024 / 32],
665 array_mut_ref![output, 0, 1024],
666 ),
667 2 => unpack_32_2(
668 array_ref![input, 0, 1024 * 2 / 32],
669 array_mut_ref![output, 0, 1024],
670 ),
671 3 => unpack_32_3(
672 array_ref![input, 0, 1024 * 3 / 32],
673 array_mut_ref![output, 0, 1024],
674 ),
675 4 => unpack_32_4(
676 array_ref![input, 0, 1024 * 4 / 32],
677 array_mut_ref![output, 0, 1024],
678 ),
679 5 => unpack_32_5(
680 array_ref![input, 0, 1024 * 5 / 32],
681 array_mut_ref![output, 0, 1024],
682 ),
683 6 => unpack_32_6(
684 array_ref![input, 0, 1024 * 6 / 32],
685 array_mut_ref![output, 0, 1024],
686 ),
687 7 => unpack_32_7(
688 array_ref![input, 0, 1024 * 7 / 32],
689 array_mut_ref![output, 0, 1024],
690 ),
691 8 => unpack_32_8(
692 array_ref![input, 0, 1024 * 8 / 32],
693 array_mut_ref![output, 0, 1024],
694 ),
695 9 => unpack_32_9(
696 array_ref![input, 0, 1024 * 9 / 32],
697 array_mut_ref![output, 0, 1024],
698 ),
699
700 10 => unpack_32_10(
701 array_ref![input, 0, 1024 * 10 / 32],
702 array_mut_ref![output, 0, 1024],
703 ),
704 11 => unpack_32_11(
705 array_ref![input, 0, 1024 * 11 / 32],
706 array_mut_ref![output, 0, 1024],
707 ),
708 12 => unpack_32_12(
709 array_ref![input, 0, 1024 * 12 / 32],
710 array_mut_ref![output, 0, 1024],
711 ),
712 13 => unpack_32_13(
713 array_ref![input, 0, 1024 * 13 / 32],
714 array_mut_ref![output, 0, 1024],
715 ),
716 14 => unpack_32_14(
717 array_ref![input, 0, 1024 * 14 / 32],
718 array_mut_ref![output, 0, 1024],
719 ),
720 15 => unpack_32_15(
721 array_ref![input, 0, 1024 * 15 / 32],
722 array_mut_ref![output, 0, 1024],
723 ),
724 16 => unpack_32_16(
725 array_ref![input, 0, 1024 * 16 / 32],
726 array_mut_ref![output, 0, 1024],
727 ),
728 17 => unpack_32_17(
729 array_ref![input, 0, 1024 * 17 / 32],
730 array_mut_ref![output, 0, 1024],
731 ),
732 18 => unpack_32_18(
733 array_ref![input, 0, 1024 * 18 / 32],
734 array_mut_ref![output, 0, 1024],
735 ),
736 19 => unpack_32_19(
737 array_ref![input, 0, 1024 * 19 / 32],
738 array_mut_ref![output, 0, 1024],
739 ),
740
741 20 => unpack_32_20(
742 array_ref![input, 0, 1024 * 20 / 32],
743 array_mut_ref![output, 0, 1024],
744 ),
745 21 => unpack_32_21(
746 array_ref![input, 0, 1024 * 21 / 32],
747 array_mut_ref![output, 0, 1024],
748 ),
749 22 => unpack_32_22(
750 array_ref![input, 0, 1024 * 22 / 32],
751 array_mut_ref![output, 0, 1024],
752 ),
753 23 => unpack_32_23(
754 array_ref![input, 0, 1024 * 23 / 32],
755 array_mut_ref![output, 0, 1024],
756 ),
757 24 => unpack_32_24(
758 array_ref![input, 0, 1024 * 24 / 32],
759 array_mut_ref![output, 0, 1024],
760 ),
761 25 => unpack_32_25(
762 array_ref![input, 0, 1024 * 25 / 32],
763 array_mut_ref![output, 0, 1024],
764 ),
765 26 => unpack_32_26(
766 array_ref![input, 0, 1024 * 26 / 32],
767 array_mut_ref![output, 0, 1024],
768 ),
769 27 => unpack_32_27(
770 array_ref![input, 0, 1024 * 27 / 32],
771 array_mut_ref![output, 0, 1024],
772 ),
773 28 => unpack_32_28(
774 array_ref![input, 0, 1024 * 28 / 32],
775 array_mut_ref![output, 0, 1024],
776 ),
777 29 => unpack_32_29(
778 array_ref![input, 0, 1024 * 29 / 32],
779 array_mut_ref![output, 0, 1024],
780 ),
781
782 30 => unpack_32_30(
783 array_ref![input, 0, 1024 * 30 / 32],
784 array_mut_ref![output, 0, 1024],
785 ),
786 31 => unpack_32_31(
787 array_ref![input, 0, 1024 * 31 / 32],
788 array_mut_ref![output, 0, 1024],
789 ),
790 32 => unpack_32_32(
791 array_ref![input, 0, 1024 * 32 / 32],
792 array_mut_ref![output, 0, 1024],
793 ),
794
795 _ => unreachable!("Unsupported width: {}", width),
796 }
797 }
798}
799
800impl BitPacking for u64 {
801 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
802 let packed_len = 128 * width / size_of::<Self>();
803 debug_assert_eq!(
804 output.len(),
805 packed_len,
806 "Output buffer must be of size 1024 * W / T"
807 );
808 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
809 debug_assert!(
810 width <= Self::T,
811 "Width must be less than or equal to {}",
812 Self::T
813 );
814
815 match width {
816 0 => {
817 }
819 1 => pack_64_1(
820 array_ref![input, 0, 1024],
821 array_mut_ref![output, 0, 1024 / 64],
822 ),
823 2 => pack_64_2(
824 array_ref![input, 0, 1024],
825 array_mut_ref![output, 0, 1024 * 2 / 64],
826 ),
827 3 => pack_64_3(
828 array_ref![input, 0, 1024],
829 array_mut_ref![output, 0, 1024 * 3 / 64],
830 ),
831 4 => pack_64_4(
832 array_ref![input, 0, 1024],
833 array_mut_ref![output, 0, 1024 * 4 / 64],
834 ),
835 5 => pack_64_5(
836 array_ref![input, 0, 1024],
837 array_mut_ref![output, 0, 1024 * 5 / 64],
838 ),
839 6 => pack_64_6(
840 array_ref![input, 0, 1024],
841 array_mut_ref![output, 0, 1024 * 6 / 64],
842 ),
843 7 => pack_64_7(
844 array_ref![input, 0, 1024],
845 array_mut_ref![output, 0, 1024 * 7 / 64],
846 ),
847 8 => pack_64_8(
848 array_ref![input, 0, 1024],
849 array_mut_ref![output, 0, 1024 * 8 / 64],
850 ),
851 9 => pack_64_9(
852 array_ref![input, 0, 1024],
853 array_mut_ref![output, 0, 1024 * 9 / 64],
854 ),
855
856 10 => pack_64_10(
857 array_ref![input, 0, 1024],
858 array_mut_ref![output, 0, 1024 * 10 / 64],
859 ),
860 11 => pack_64_11(
861 array_ref![input, 0, 1024],
862 array_mut_ref![output, 0, 1024 * 11 / 64],
863 ),
864 12 => pack_64_12(
865 array_ref![input, 0, 1024],
866 array_mut_ref![output, 0, 1024 * 12 / 64],
867 ),
868 13 => pack_64_13(
869 array_ref![input, 0, 1024],
870 array_mut_ref![output, 0, 1024 * 13 / 64],
871 ),
872 14 => pack_64_14(
873 array_ref![input, 0, 1024],
874 array_mut_ref![output, 0, 1024 * 14 / 64],
875 ),
876 15 => pack_64_15(
877 array_ref![input, 0, 1024],
878 array_mut_ref![output, 0, 1024 * 15 / 64],
879 ),
880 16 => pack_64_16(
881 array_ref![input, 0, 1024],
882 array_mut_ref![output, 0, 1024 * 16 / 64],
883 ),
884 17 => pack_64_17(
885 array_ref![input, 0, 1024],
886 array_mut_ref![output, 0, 1024 * 17 / 64],
887 ),
888 18 => pack_64_18(
889 array_ref![input, 0, 1024],
890 array_mut_ref![output, 0, 1024 * 18 / 64],
891 ),
892 19 => pack_64_19(
893 array_ref![input, 0, 1024],
894 array_mut_ref![output, 0, 1024 * 19 / 64],
895 ),
896
897 20 => pack_64_20(
898 array_ref![input, 0, 1024],
899 array_mut_ref![output, 0, 1024 * 20 / 64],
900 ),
901 21 => pack_64_21(
902 array_ref![input, 0, 1024],
903 array_mut_ref![output, 0, 1024 * 21 / 64],
904 ),
905 22 => pack_64_22(
906 array_ref![input, 0, 1024],
907 array_mut_ref![output, 0, 1024 * 22 / 64],
908 ),
909 23 => pack_64_23(
910 array_ref![input, 0, 1024],
911 array_mut_ref![output, 0, 1024 * 23 / 64],
912 ),
913 24 => pack_64_24(
914 array_ref![input, 0, 1024],
915 array_mut_ref![output, 0, 1024 * 24 / 64],
916 ),
917 25 => pack_64_25(
918 array_ref![input, 0, 1024],
919 array_mut_ref![output, 0, 1024 * 25 / 64],
920 ),
921 26 => pack_64_26(
922 array_ref![input, 0, 1024],
923 array_mut_ref![output, 0, 1024 * 26 / 64],
924 ),
925 27 => pack_64_27(
926 array_ref![input, 0, 1024],
927 array_mut_ref![output, 0, 1024 * 27 / 64],
928 ),
929 28 => pack_64_28(
930 array_ref![input, 0, 1024],
931 array_mut_ref![output, 0, 1024 * 28 / 64],
932 ),
933 29 => pack_64_29(
934 array_ref![input, 0, 1024],
935 array_mut_ref![output, 0, 1024 * 29 / 64],
936 ),
937
938 30 => pack_64_30(
939 array_ref![input, 0, 1024],
940 array_mut_ref![output, 0, 1024 * 30 / 64],
941 ),
942 31 => pack_64_31(
943 array_ref![input, 0, 1024],
944 array_mut_ref![output, 0, 1024 * 31 / 64],
945 ),
946 32 => pack_64_32(
947 array_ref![input, 0, 1024],
948 array_mut_ref![output, 0, 1024 * 32 / 64],
949 ),
950 33 => pack_64_33(
951 array_ref![input, 0, 1024],
952 array_mut_ref![output, 0, 1024 * 33 / 64],
953 ),
954 34 => pack_64_34(
955 array_ref![input, 0, 1024],
956 array_mut_ref![output, 0, 1024 * 34 / 64],
957 ),
958 35 => pack_64_35(
959 array_ref![input, 0, 1024],
960 array_mut_ref![output, 0, 1024 * 35 / 64],
961 ),
962 36 => pack_64_36(
963 array_ref![input, 0, 1024],
964 array_mut_ref![output, 0, 1024 * 36 / 64],
965 ),
966 37 => pack_64_37(
967 array_ref![input, 0, 1024],
968 array_mut_ref![output, 0, 1024 * 37 / 64],
969 ),
970 38 => pack_64_38(
971 array_ref![input, 0, 1024],
972 array_mut_ref![output, 0, 1024 * 38 / 64],
973 ),
974 39 => pack_64_39(
975 array_ref![input, 0, 1024],
976 array_mut_ref![output, 0, 1024 * 39 / 64],
977 ),
978
979 40 => pack_64_40(
980 array_ref![input, 0, 1024],
981 array_mut_ref![output, 0, 1024 * 40 / 64],
982 ),
983 41 => pack_64_41(
984 array_ref![input, 0, 1024],
985 array_mut_ref![output, 0, 1024 * 41 / 64],
986 ),
987 42 => pack_64_42(
988 array_ref![input, 0, 1024],
989 array_mut_ref![output, 0, 1024 * 42 / 64],
990 ),
991 43 => pack_64_43(
992 array_ref![input, 0, 1024],
993 array_mut_ref![output, 0, 1024 * 43 / 64],
994 ),
995 44 => pack_64_44(
996 array_ref![input, 0, 1024],
997 array_mut_ref![output, 0, 1024 * 44 / 64],
998 ),
999 45 => pack_64_45(
1000 array_ref![input, 0, 1024],
1001 array_mut_ref![output, 0, 1024 * 45 / 64],
1002 ),
1003 46 => pack_64_46(
1004 array_ref![input, 0, 1024],
1005 array_mut_ref![output, 0, 1024 * 46 / 64],
1006 ),
1007 47 => pack_64_47(
1008 array_ref![input, 0, 1024],
1009 array_mut_ref![output, 0, 1024 * 47 / 64],
1010 ),
1011 48 => pack_64_48(
1012 array_ref![input, 0, 1024],
1013 array_mut_ref![output, 0, 1024 * 48 / 64],
1014 ),
1015 49 => pack_64_49(
1016 array_ref![input, 0, 1024],
1017 array_mut_ref![output, 0, 1024 * 49 / 64],
1018 ),
1019
1020 50 => pack_64_50(
1021 array_ref![input, 0, 1024],
1022 array_mut_ref![output, 0, 1024 * 50 / 64],
1023 ),
1024 51 => pack_64_51(
1025 array_ref![input, 0, 1024],
1026 array_mut_ref![output, 0, 1024 * 51 / 64],
1027 ),
1028 52 => pack_64_52(
1029 array_ref![input, 0, 1024],
1030 array_mut_ref![output, 0, 1024 * 52 / 64],
1031 ),
1032 53 => pack_64_53(
1033 array_ref![input, 0, 1024],
1034 array_mut_ref![output, 0, 1024 * 53 / 64],
1035 ),
1036 54 => pack_64_54(
1037 array_ref![input, 0, 1024],
1038 array_mut_ref![output, 0, 1024 * 54 / 64],
1039 ),
1040 55 => pack_64_55(
1041 array_ref![input, 0, 1024],
1042 array_mut_ref![output, 0, 1024 * 55 / 64],
1043 ),
1044 56 => pack_64_56(
1045 array_ref![input, 0, 1024],
1046 array_mut_ref![output, 0, 1024 * 56 / 64],
1047 ),
1048 57 => pack_64_57(
1049 array_ref![input, 0, 1024],
1050 array_mut_ref![output, 0, 1024 * 57 / 64],
1051 ),
1052 58 => pack_64_58(
1053 array_ref![input, 0, 1024],
1054 array_mut_ref![output, 0, 1024 * 58 / 64],
1055 ),
1056 59 => pack_64_59(
1057 array_ref![input, 0, 1024],
1058 array_mut_ref![output, 0, 1024 * 59 / 64],
1059 ),
1060
1061 60 => pack_64_60(
1062 array_ref![input, 0, 1024],
1063 array_mut_ref![output, 0, 1024 * 60 / 64],
1064 ),
1065 61 => pack_64_61(
1066 array_ref![input, 0, 1024],
1067 array_mut_ref![output, 0, 1024 * 61 / 64],
1068 ),
1069 62 => pack_64_62(
1070 array_ref![input, 0, 1024],
1071 array_mut_ref![output, 0, 1024 * 62 / 64],
1072 ),
1073 63 => pack_64_63(
1074 array_ref![input, 0, 1024],
1075 array_mut_ref![output, 0, 1024 * 63 / 64],
1076 ),
1077 64 => pack_64_64(
1078 array_ref![input, 0, 1024],
1079 array_mut_ref![output, 0, 1024 * 64 / 64],
1080 ),
1081
1082 _ => unreachable!("Unsupported width: {}", width),
1083 }
1084 }
1085
1086 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
1087 let packed_len = 128 * width / size_of::<Self>();
1088 debug_assert_eq!(
1089 input.len(),
1090 packed_len,
1091 "Input buffer must be of size 1024 * W / T"
1092 );
1093 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
1094 debug_assert!(
1095 width <= Self::T,
1096 "Width must be less than or equal to {}",
1097 Self::T
1098 );
1099
1100 match width {
1101 0 => {
1102 output.fill(0);
1103 }
1104 1 => unpack_64_1(
1105 array_ref![input, 0, 1024 / 64],
1106 array_mut_ref![output, 0, 1024],
1107 ),
1108 2 => unpack_64_2(
1109 array_ref![input, 0, 1024 * 2 / 64],
1110 array_mut_ref![output, 0, 1024],
1111 ),
1112 3 => unpack_64_3(
1113 array_ref![input, 0, 1024 * 3 / 64],
1114 array_mut_ref![output, 0, 1024],
1115 ),
1116 4 => unpack_64_4(
1117 array_ref![input, 0, 1024 * 4 / 64],
1118 array_mut_ref![output, 0, 1024],
1119 ),
1120 5 => unpack_64_5(
1121 array_ref![input, 0, 1024 * 5 / 64],
1122 array_mut_ref![output, 0, 1024],
1123 ),
1124 6 => unpack_64_6(
1125 array_ref![input, 0, 1024 * 6 / 64],
1126 array_mut_ref![output, 0, 1024],
1127 ),
1128 7 => unpack_64_7(
1129 array_ref![input, 0, 1024 * 7 / 64],
1130 array_mut_ref![output, 0, 1024],
1131 ),
1132 8 => unpack_64_8(
1133 array_ref![input, 0, 1024 * 8 / 64],
1134 array_mut_ref![output, 0, 1024],
1135 ),
1136 9 => unpack_64_9(
1137 array_ref![input, 0, 1024 * 9 / 64],
1138 array_mut_ref![output, 0, 1024],
1139 ),
1140
1141 10 => unpack_64_10(
1142 array_ref![input, 0, 1024 * 10 / 64],
1143 array_mut_ref![output, 0, 1024],
1144 ),
1145 11 => unpack_64_11(
1146 array_ref![input, 0, 1024 * 11 / 64],
1147 array_mut_ref![output, 0, 1024],
1148 ),
1149 12 => unpack_64_12(
1150 array_ref![input, 0, 1024 * 12 / 64],
1151 array_mut_ref![output, 0, 1024],
1152 ),
1153 13 => unpack_64_13(
1154 array_ref![input, 0, 1024 * 13 / 64],
1155 array_mut_ref![output, 0, 1024],
1156 ),
1157 14 => unpack_64_14(
1158 array_ref![input, 0, 1024 * 14 / 64],
1159 array_mut_ref![output, 0, 1024],
1160 ),
1161 15 => unpack_64_15(
1162 array_ref![input, 0, 1024 * 15 / 64],
1163 array_mut_ref![output, 0, 1024],
1164 ),
1165 16 => unpack_64_16(
1166 array_ref![input, 0, 1024 * 16 / 64],
1167 array_mut_ref![output, 0, 1024],
1168 ),
1169 17 => unpack_64_17(
1170 array_ref![input, 0, 1024 * 17 / 64],
1171 array_mut_ref![output, 0, 1024],
1172 ),
1173 18 => unpack_64_18(
1174 array_ref![input, 0, 1024 * 18 / 64],
1175 array_mut_ref![output, 0, 1024],
1176 ),
1177 19 => unpack_64_19(
1178 array_ref![input, 0, 1024 * 19 / 64],
1179 array_mut_ref![output, 0, 1024],
1180 ),
1181
1182 20 => unpack_64_20(
1183 array_ref![input, 0, 1024 * 20 / 64],
1184 array_mut_ref![output, 0, 1024],
1185 ),
1186 21 => unpack_64_21(
1187 array_ref![input, 0, 1024 * 21 / 64],
1188 array_mut_ref![output, 0, 1024],
1189 ),
1190 22 => unpack_64_22(
1191 array_ref![input, 0, 1024 * 22 / 64],
1192 array_mut_ref![output, 0, 1024],
1193 ),
1194 23 => unpack_64_23(
1195 array_ref![input, 0, 1024 * 23 / 64],
1196 array_mut_ref![output, 0, 1024],
1197 ),
1198 24 => unpack_64_24(
1199 array_ref![input, 0, 1024 * 24 / 64],
1200 array_mut_ref![output, 0, 1024],
1201 ),
1202 25 => unpack_64_25(
1203 array_ref![input, 0, 1024 * 25 / 64],
1204 array_mut_ref![output, 0, 1024],
1205 ),
1206 26 => unpack_64_26(
1207 array_ref![input, 0, 1024 * 26 / 64],
1208 array_mut_ref![output, 0, 1024],
1209 ),
1210 27 => unpack_64_27(
1211 array_ref![input, 0, 1024 * 27 / 64],
1212 array_mut_ref![output, 0, 1024],
1213 ),
1214 28 => unpack_64_28(
1215 array_ref![input, 0, 1024 * 28 / 64],
1216 array_mut_ref![output, 0, 1024],
1217 ),
1218 29 => unpack_64_29(
1219 array_ref![input, 0, 1024 * 29 / 64],
1220 array_mut_ref![output, 0, 1024],
1221 ),
1222
1223 30 => unpack_64_30(
1224 array_ref![input, 0, 1024 * 30 / 64],
1225 array_mut_ref![output, 0, 1024],
1226 ),
1227 31 => unpack_64_31(
1228 array_ref![input, 0, 1024 * 31 / 64],
1229 array_mut_ref![output, 0, 1024],
1230 ),
1231 32 => unpack_64_32(
1232 array_ref![input, 0, 1024 * 32 / 64],
1233 array_mut_ref![output, 0, 1024],
1234 ),
1235 33 => unpack_64_33(
1236 array_ref![input, 0, 1024 * 33 / 64],
1237 array_mut_ref![output, 0, 1024],
1238 ),
1239 34 => unpack_64_34(
1240 array_ref![input, 0, 1024 * 34 / 64],
1241 array_mut_ref![output, 0, 1024],
1242 ),
1243 35 => unpack_64_35(
1244 array_ref![input, 0, 1024 * 35 / 64],
1245 array_mut_ref![output, 0, 1024],
1246 ),
1247 36 => unpack_64_36(
1248 array_ref![input, 0, 1024 * 36 / 64],
1249 array_mut_ref![output, 0, 1024],
1250 ),
1251 37 => unpack_64_37(
1252 array_ref![input, 0, 1024 * 37 / 64],
1253 array_mut_ref![output, 0, 1024],
1254 ),
1255 38 => unpack_64_38(
1256 array_ref![input, 0, 1024 * 38 / 64],
1257 array_mut_ref![output, 0, 1024],
1258 ),
1259 39 => unpack_64_39(
1260 array_ref![input, 0, 1024 * 39 / 64],
1261 array_mut_ref![output, 0, 1024],
1262 ),
1263
1264 40 => unpack_64_40(
1265 array_ref![input, 0, 1024 * 40 / 64],
1266 array_mut_ref![output, 0, 1024],
1267 ),
1268 41 => unpack_64_41(
1269 array_ref![input, 0, 1024 * 41 / 64],
1270 array_mut_ref![output, 0, 1024],
1271 ),
1272 42 => unpack_64_42(
1273 array_ref![input, 0, 1024 * 42 / 64],
1274 array_mut_ref![output, 0, 1024],
1275 ),
1276 43 => unpack_64_43(
1277 array_ref![input, 0, 1024 * 43 / 64],
1278 array_mut_ref![output, 0, 1024],
1279 ),
1280 44 => unpack_64_44(
1281 array_ref![input, 0, 1024 * 44 / 64],
1282 array_mut_ref![output, 0, 1024],
1283 ),
1284 45 => unpack_64_45(
1285 array_ref![input, 0, 1024 * 45 / 64],
1286 array_mut_ref![output, 0, 1024],
1287 ),
1288 46 => unpack_64_46(
1289 array_ref![input, 0, 1024 * 46 / 64],
1290 array_mut_ref![output, 0, 1024],
1291 ),
1292 47 => unpack_64_47(
1293 array_ref![input, 0, 1024 * 47 / 64],
1294 array_mut_ref![output, 0, 1024],
1295 ),
1296 48 => unpack_64_48(
1297 array_ref![input, 0, 1024 * 48 / 64],
1298 array_mut_ref![output, 0, 1024],
1299 ),
1300 49 => unpack_64_49(
1301 array_ref![input, 0, 1024 * 49 / 64],
1302 array_mut_ref![output, 0, 1024],
1303 ),
1304
1305 50 => unpack_64_50(
1306 array_ref![input, 0, 1024 * 50 / 64],
1307 array_mut_ref![output, 0, 1024],
1308 ),
1309 51 => unpack_64_51(
1310 array_ref![input, 0, 1024 * 51 / 64],
1311 array_mut_ref![output, 0, 1024],
1312 ),
1313 52 => unpack_64_52(
1314 array_ref![input, 0, 1024 * 52 / 64],
1315 array_mut_ref![output, 0, 1024],
1316 ),
1317 53 => unpack_64_53(
1318 array_ref![input, 0, 1024 * 53 / 64],
1319 array_mut_ref![output, 0, 1024],
1320 ),
1321 54 => unpack_64_54(
1322 array_ref![input, 0, 1024 * 54 / 64],
1323 array_mut_ref![output, 0, 1024],
1324 ),
1325 55 => unpack_64_55(
1326 array_ref![input, 0, 1024 * 55 / 64],
1327 array_mut_ref![output, 0, 1024],
1328 ),
1329 56 => unpack_64_56(
1330 array_ref![input, 0, 1024 * 56 / 64],
1331 array_mut_ref![output, 0, 1024],
1332 ),
1333 57 => unpack_64_57(
1334 array_ref![input, 0, 1024 * 57 / 64],
1335 array_mut_ref![output, 0, 1024],
1336 ),
1337 58 => unpack_64_58(
1338 array_ref![input, 0, 1024 * 58 / 64],
1339 array_mut_ref![output, 0, 1024],
1340 ),
1341 59 => unpack_64_59(
1342 array_ref![input, 0, 1024 * 59 / 64],
1343 array_mut_ref![output, 0, 1024],
1344 ),
1345
1346 60 => unpack_64_60(
1347 array_ref![input, 0, 1024 * 60 / 64],
1348 array_mut_ref![output, 0, 1024],
1349 ),
1350 61 => unpack_64_61(
1351 array_ref![input, 0, 1024 * 61 / 64],
1352 array_mut_ref![output, 0, 1024],
1353 ),
1354 62 => unpack_64_62(
1355 array_ref![input, 0, 1024 * 62 / 64],
1356 array_mut_ref![output, 0, 1024],
1357 ),
1358 63 => unpack_64_63(
1359 array_ref![input, 0, 1024 * 63 / 64],
1360 array_mut_ref![output, 0, 1024],
1361 ),
1362 64 => unpack_64_64(
1363 array_ref![input, 0, 1024 * 64 / 64],
1364 array_mut_ref![output, 0, 1024],
1365 ),
1366
1367 _ => unreachable!("Unsupported width: {}", width),
1368 }
1369 }
1370}
1371
1372macro_rules! unpack_8 {
1373 ($name:ident, $bits:expr) => {
1374 fn $name(input: &[u8; 1024 * $bits / u8::T], output: &mut [u8; 1024]) {
1375 for lane in 0..u8::LANES {
1376 unpack!(u8, $bits, input, lane, |$idx, $elem| {
1377 output[$idx] = $elem;
1378 });
1379 }
1380 }
1381 };
1382}
1383
1384unpack_8!(unpack_8_1, 1);
1385unpack_8!(unpack_8_2, 2);
1386unpack_8!(unpack_8_3, 3);
1387unpack_8!(unpack_8_4, 4);
1388unpack_8!(unpack_8_5, 5);
1389unpack_8!(unpack_8_6, 6);
1390unpack_8!(unpack_8_7, 7);
1391unpack_8!(unpack_8_8, 8);
1392
1393macro_rules! pack_8 {
1394 ($name:ident, $bits:expr) => {
1395 fn $name(input: &[u8; 1024], output: &mut [u8; 1024 * $bits / u8::T]) {
1396 for lane in 0..u8::LANES {
1397 pack!(u8, $bits, output, lane, |$idx| { input[$idx] });
1398 }
1399 }
1400 };
1401}
1402pack_8!(pack_8_1, 1);
1403pack_8!(pack_8_2, 2);
1404pack_8!(pack_8_3, 3);
1405pack_8!(pack_8_4, 4);
1406pack_8!(pack_8_5, 5);
1407pack_8!(pack_8_6, 6);
1408pack_8!(pack_8_7, 7);
1409pack_8!(pack_8_8, 8);
1410
1411macro_rules! unpack_16 {
1412 ($name:ident, $bits:expr) => {
1413 fn $name(input: &[u16; 1024 * $bits / u16::T], output: &mut [u16; 1024]) {
1414 for lane in 0..u16::LANES {
1415 unpack!(u16, $bits, input, lane, |$idx, $elem| {
1416 output[$idx] = $elem;
1417 });
1418 }
1419 }
1420 };
1421}
1422
1423unpack_16!(unpack_16_1, 1);
1424unpack_16!(unpack_16_2, 2);
1425unpack_16!(unpack_16_3, 3);
1426unpack_16!(unpack_16_4, 4);
1427unpack_16!(unpack_16_5, 5);
1428unpack_16!(unpack_16_6, 6);
1429unpack_16!(unpack_16_7, 7);
1430unpack_16!(unpack_16_8, 8);
1431unpack_16!(unpack_16_9, 9);
1432unpack_16!(unpack_16_10, 10);
1433unpack_16!(unpack_16_11, 11);
1434unpack_16!(unpack_16_12, 12);
1435unpack_16!(unpack_16_13, 13);
1436unpack_16!(unpack_16_14, 14);
1437unpack_16!(unpack_16_15, 15);
1438unpack_16!(unpack_16_16, 16);
1439
1440macro_rules! pack_16 {
1441 ($name:ident, $bits:expr) => {
1442 fn $name(input: &[u16; 1024], output: &mut [u16; 1024 * $bits / u16::T]) {
1443 for lane in 0..u16::LANES {
1444 pack!(u16, $bits, output, lane, |$idx| { input[$idx] });
1445 }
1446 }
1447 };
1448}
1449
1450pack_16!(pack_16_1, 1);
1451pack_16!(pack_16_2, 2);
1452pack_16!(pack_16_3, 3);
1453pack_16!(pack_16_4, 4);
1454pack_16!(pack_16_5, 5);
1455pack_16!(pack_16_6, 6);
1456pack_16!(pack_16_7, 7);
1457pack_16!(pack_16_8, 8);
1458pack_16!(pack_16_9, 9);
1459pack_16!(pack_16_10, 10);
1460pack_16!(pack_16_11, 11);
1461pack_16!(pack_16_12, 12);
1462pack_16!(pack_16_13, 13);
1463pack_16!(pack_16_14, 14);
1464pack_16!(pack_16_15, 15);
1465pack_16!(pack_16_16, 16);
1466
1467macro_rules! unpack_32 {
1468 ($name:ident, $bit_width:expr) => {
1469 fn $name(input: &[u32; 1024 * $bit_width / u32::T], output: &mut [u32; 1024]) {
1470 for lane in 0..u32::LANES {
1471 unpack!(u32, $bit_width, input, lane, |$idx, $elem| {
1472 output[$idx] = $elem
1473 });
1474 }
1475 }
1476 };
1477}
1478
1479unpack_32!(unpack_32_1, 1);
1480unpack_32!(unpack_32_2, 2);
1481unpack_32!(unpack_32_3, 3);
1482unpack_32!(unpack_32_4, 4);
1483unpack_32!(unpack_32_5, 5);
1484unpack_32!(unpack_32_6, 6);
1485unpack_32!(unpack_32_7, 7);
1486unpack_32!(unpack_32_8, 8);
1487unpack_32!(unpack_32_9, 9);
1488unpack_32!(unpack_32_10, 10);
1489unpack_32!(unpack_32_11, 11);
1490unpack_32!(unpack_32_12, 12);
1491unpack_32!(unpack_32_13, 13);
1492unpack_32!(unpack_32_14, 14);
1493unpack_32!(unpack_32_15, 15);
1494unpack_32!(unpack_32_16, 16);
1495unpack_32!(unpack_32_17, 17);
1496unpack_32!(unpack_32_18, 18);
1497unpack_32!(unpack_32_19, 19);
1498unpack_32!(unpack_32_20, 20);
1499unpack_32!(unpack_32_21, 21);
1500unpack_32!(unpack_32_22, 22);
1501unpack_32!(unpack_32_23, 23);
1502unpack_32!(unpack_32_24, 24);
1503unpack_32!(unpack_32_25, 25);
1504unpack_32!(unpack_32_26, 26);
1505unpack_32!(unpack_32_27, 27);
1506unpack_32!(unpack_32_28, 28);
1507unpack_32!(unpack_32_29, 29);
1508unpack_32!(unpack_32_30, 30);
1509unpack_32!(unpack_32_31, 31);
1510unpack_32!(unpack_32_32, 32);
1511
1512macro_rules! pack_32 {
1513 ($name:ident, $bits:expr) => {
1514 fn $name(input: &[u32; 1024], output: &mut [u32; 1024 * $bits / u32::BITS as usize]) {
1515 for lane in 0..u32::LANES {
1516 pack!(u32, $bits, output, lane, |$idx| { input[$idx] });
1517 }
1518 }
1519 };
1520}
1521
1522pack_32!(pack_32_1, 1);
1523pack_32!(pack_32_2, 2);
1524pack_32!(pack_32_3, 3);
1525pack_32!(pack_32_4, 4);
1526pack_32!(pack_32_5, 5);
1527pack_32!(pack_32_6, 6);
1528pack_32!(pack_32_7, 7);
1529pack_32!(pack_32_8, 8);
1530pack_32!(pack_32_9, 9);
1531pack_32!(pack_32_10, 10);
1532pack_32!(pack_32_11, 11);
1533pack_32!(pack_32_12, 12);
1534pack_32!(pack_32_13, 13);
1535pack_32!(pack_32_14, 14);
1536pack_32!(pack_32_15, 15);
1537pack_32!(pack_32_16, 16);
1538pack_32!(pack_32_17, 17);
1539pack_32!(pack_32_18, 18);
1540pack_32!(pack_32_19, 19);
1541pack_32!(pack_32_20, 20);
1542pack_32!(pack_32_21, 21);
1543pack_32!(pack_32_22, 22);
1544pack_32!(pack_32_23, 23);
1545pack_32!(pack_32_24, 24);
1546pack_32!(pack_32_25, 25);
1547pack_32!(pack_32_26, 26);
1548pack_32!(pack_32_27, 27);
1549pack_32!(pack_32_28, 28);
1550pack_32!(pack_32_29, 29);
1551pack_32!(pack_32_30, 30);
1552pack_32!(pack_32_31, 31);
1553pack_32!(pack_32_32, 32);
1554
1555macro_rules! unpack_64 {
1556 ($name:ident, $bit_width:expr) => {
1557 fn $name(input: &[u64; 1024 * $bit_width / u64::T], output: &mut [u64; 1024]) {
1558 for lane in 0..u64::LANES {
1559 unpack!(u64, $bit_width, input, lane, |$idx, $elem| {
1560 output[$idx] = $elem
1561 });
1562 }
1563 }
1564 };
1565}
1566
1567unpack_64!(unpack_64_1, 1);
1568unpack_64!(unpack_64_2, 2);
1569unpack_64!(unpack_64_3, 3);
1570unpack_64!(unpack_64_4, 4);
1571unpack_64!(unpack_64_5, 5);
1572unpack_64!(unpack_64_6, 6);
1573unpack_64!(unpack_64_7, 7);
1574unpack_64!(unpack_64_8, 8);
1575unpack_64!(unpack_64_9, 9);
1576unpack_64!(unpack_64_10, 10);
1577unpack_64!(unpack_64_11, 11);
1578unpack_64!(unpack_64_12, 12);
1579unpack_64!(unpack_64_13, 13);
1580unpack_64!(unpack_64_14, 14);
1581unpack_64!(unpack_64_15, 15);
1582unpack_64!(unpack_64_16, 16);
1583unpack_64!(unpack_64_17, 17);
1584unpack_64!(unpack_64_18, 18);
1585unpack_64!(unpack_64_19, 19);
1586unpack_64!(unpack_64_20, 20);
1587unpack_64!(unpack_64_21, 21);
1588unpack_64!(unpack_64_22, 22);
1589unpack_64!(unpack_64_23, 23);
1590unpack_64!(unpack_64_24, 24);
1591unpack_64!(unpack_64_25, 25);
1592unpack_64!(unpack_64_26, 26);
1593unpack_64!(unpack_64_27, 27);
1594unpack_64!(unpack_64_28, 28);
1595unpack_64!(unpack_64_29, 29);
1596unpack_64!(unpack_64_30, 30);
1597unpack_64!(unpack_64_31, 31);
1598unpack_64!(unpack_64_32, 32);
1599
1600unpack_64!(unpack_64_33, 33);
1601unpack_64!(unpack_64_34, 34);
1602unpack_64!(unpack_64_35, 35);
1603unpack_64!(unpack_64_36, 36);
1604unpack_64!(unpack_64_37, 37);
1605unpack_64!(unpack_64_38, 38);
1606unpack_64!(unpack_64_39, 39);
1607unpack_64!(unpack_64_40, 40);
1608unpack_64!(unpack_64_41, 41);
1609unpack_64!(unpack_64_42, 42);
1610unpack_64!(unpack_64_43, 43);
1611unpack_64!(unpack_64_44, 44);
1612unpack_64!(unpack_64_45, 45);
1613unpack_64!(unpack_64_46, 46);
1614unpack_64!(unpack_64_47, 47);
1615unpack_64!(unpack_64_48, 48);
1616unpack_64!(unpack_64_49, 49);
1617unpack_64!(unpack_64_50, 50);
1618unpack_64!(unpack_64_51, 51);
1619unpack_64!(unpack_64_52, 52);
1620unpack_64!(unpack_64_53, 53);
1621unpack_64!(unpack_64_54, 54);
1622unpack_64!(unpack_64_55, 55);
1623unpack_64!(unpack_64_56, 56);
1624unpack_64!(unpack_64_57, 57);
1625unpack_64!(unpack_64_58, 58);
1626unpack_64!(unpack_64_59, 59);
1627unpack_64!(unpack_64_60, 60);
1628unpack_64!(unpack_64_61, 61);
1629unpack_64!(unpack_64_62, 62);
1630unpack_64!(unpack_64_63, 63);
1631unpack_64!(unpack_64_64, 64);
1632
1633macro_rules! pack_64 {
1634 ($name:ident, $bits:expr) => {
1635 fn $name(input: &[u64; 1024], output: &mut [u64; 1024 * $bits / u64::BITS as usize]) {
1636 for lane in 0..u64::LANES {
1637 pack!(u64, $bits, output, lane, |$idx| { input[$idx] });
1638 }
1639 }
1640 };
1641}
1642
1643pack_64!(pack_64_1, 1);
1644pack_64!(pack_64_2, 2);
1645pack_64!(pack_64_3, 3);
1646pack_64!(pack_64_4, 4);
1647pack_64!(pack_64_5, 5);
1648pack_64!(pack_64_6, 6);
1649pack_64!(pack_64_7, 7);
1650pack_64!(pack_64_8, 8);
1651pack_64!(pack_64_9, 9);
1652pack_64!(pack_64_10, 10);
1653pack_64!(pack_64_11, 11);
1654pack_64!(pack_64_12, 12);
1655pack_64!(pack_64_13, 13);
1656pack_64!(pack_64_14, 14);
1657pack_64!(pack_64_15, 15);
1658pack_64!(pack_64_16, 16);
1659pack_64!(pack_64_17, 17);
1660pack_64!(pack_64_18, 18);
1661pack_64!(pack_64_19, 19);
1662pack_64!(pack_64_20, 20);
1663pack_64!(pack_64_21, 21);
1664pack_64!(pack_64_22, 22);
1665pack_64!(pack_64_23, 23);
1666pack_64!(pack_64_24, 24);
1667pack_64!(pack_64_25, 25);
1668pack_64!(pack_64_26, 26);
1669pack_64!(pack_64_27, 27);
1670pack_64!(pack_64_28, 28);
1671pack_64!(pack_64_29, 29);
1672pack_64!(pack_64_30, 30);
1673pack_64!(pack_64_31, 31);
1674pack_64!(pack_64_32, 32);
1675
1676pack_64!(pack_64_33, 33);
1677pack_64!(pack_64_34, 34);
1678pack_64!(pack_64_35, 35);
1679pack_64!(pack_64_36, 36);
1680pack_64!(pack_64_37, 37);
1681pack_64!(pack_64_38, 38);
1682pack_64!(pack_64_39, 39);
1683pack_64!(pack_64_40, 40);
1684pack_64!(pack_64_41, 41);
1685pack_64!(pack_64_42, 42);
1686pack_64!(pack_64_43, 43);
1687pack_64!(pack_64_44, 44);
1688pack_64!(pack_64_45, 45);
1689pack_64!(pack_64_46, 46);
1690pack_64!(pack_64_47, 47);
1691pack_64!(pack_64_48, 48);
1692pack_64!(pack_64_49, 49);
1693pack_64!(pack_64_50, 50);
1694pack_64!(pack_64_51, 51);
1695pack_64!(pack_64_52, 52);
1696pack_64!(pack_64_53, 53);
1697pack_64!(pack_64_54, 54);
1698pack_64!(pack_64_55, 55);
1699pack_64!(pack_64_56, 56);
1700pack_64!(pack_64_57, 57);
1701pack_64!(pack_64_58, 58);
1702pack_64!(pack_64_59, 59);
1703pack_64!(pack_64_60, 60);
1704pack_64!(pack_64_61, 61);
1705pack_64!(pack_64_62, 62);
1706pack_64!(pack_64_63, 63);
1707pack_64!(pack_64_64, 64);
1708
1709#[cfg(test)]
1710mod test {
1711 use super::*;
1712 use core::array;
1713 pub struct XorShift {
1715 state: u64,
1716 }
1717
1718 impl XorShift {
1719 pub fn new(seed: u64) -> Self {
1720 Self { state: seed }
1721 }
1722
1723 pub fn next(&mut self) -> u64 {
1724 let mut x = self.state;
1725 x ^= x << 13;
1726 x ^= x >> 7;
1727 x ^= x << 17;
1728 self.state = x;
1729 x
1730 }
1731 }
1732
1733 fn pack_unpack_u8(bit_width: usize) {
1736 let mut values: [u8; 1024] = [0; 1024];
1737 let mut rng = XorShift::new(123456789);
1738 for value in &mut values {
1739 *value = (rng.next() % (1 << bit_width)) as u8;
1740 }
1741
1742 let mut packed = vec![0; 1024 * bit_width / 8];
1743 for lane in 0..u8::LANES {
1744 pack!(u8, bit_width, packed, lane, |$pos| {
1746 values[$pos]
1747 });
1748 }
1749
1750 let mut unpacked: [u8; 1024] = [0; 1024];
1751 for lane in 0..u8::LANES {
1752 unpack!(u8, bit_width, packed, lane, |$idx, $elem| {
1754 unpacked[$idx] = $elem;
1755 });
1756 }
1757
1758 assert_eq!(values, unpacked);
1759 }
1760
1761 fn pack_unpack_u16(bit_width: usize) {
1762 let mut values: [u16; 1024] = [0; 1024];
1763 let mut rng = XorShift::new(123456789);
1764 for value in &mut values {
1765 *value = (rng.next() % (1 << bit_width)) as u16;
1766 }
1767
1768 let mut packed = vec![0; 1024 * bit_width / 16];
1769 for lane in 0..u16::LANES {
1770 pack!(u16, bit_width, packed, lane, |$pos| {
1772 values[$pos]
1773 });
1774 }
1775
1776 let mut unpacked: [u16; 1024] = [0; 1024];
1777 for lane in 0..u16::LANES {
1778 unpack!(u16, bit_width, packed, lane, |$idx, $elem| {
1780 unpacked[$idx] = $elem;
1781 });
1782 }
1783
1784 assert_eq!(values, unpacked);
1785 }
1786
1787 fn pack_unpack_u32(bit_width: usize) {
1788 let mut values: [u32; 1024] = [0; 1024];
1789 let mut rng = XorShift::new(123456789);
1790 for value in &mut values {
1791 *value = (rng.next() % (1 << bit_width)) as u32;
1792 }
1793
1794 let mut packed = vec![0; 1024 * bit_width / 32];
1795 for lane in 0..u32::LANES {
1796 pack!(u32, bit_width, packed, lane, |$pos| {
1798 values[$pos]
1799 });
1800 }
1801
1802 let mut unpacked: [u32; 1024] = [0; 1024];
1803 for lane in 0..u32::LANES {
1804 unpack!(u32, bit_width, packed, lane, |$idx, $elem| {
1806 unpacked[$idx] = $elem;
1807 });
1808 }
1809
1810 assert_eq!(values, unpacked);
1811 }
1812
1813 fn pack_unpack_u64(bit_width: usize) {
1814 let mut values: [u64; 1024] = [0; 1024];
1815 let mut rng = XorShift::new(123456789);
1816 if bit_width == 64 {
1817 for value in &mut values {
1818 *value = rng.next();
1819 }
1820 } else {
1821 for value in &mut values {
1822 *value = rng.next() % (1 << bit_width);
1823 }
1824 }
1825
1826 let mut packed = vec![0; 1024 * bit_width / 64];
1827 for lane in 0..u64::LANES {
1828 pack!(u64, bit_width, packed, lane, |$pos| {
1830 values[$pos]
1831 });
1832 }
1833
1834 let mut unpacked: [u64; 1024] = [0; 1024];
1835 for lane in 0..u64::LANES {
1836 unpack!(u64, bit_width, packed, lane, |$idx, $elem| {
1838 unpacked[$idx] = $elem;
1839 });
1840 }
1841
1842 assert_eq!(values, unpacked);
1843 }
1844
1845 #[test]
1846 fn test_pack() {
1847 pack_unpack_u8(0);
1848 pack_unpack_u8(1);
1849 pack_unpack_u8(2);
1850 pack_unpack_u8(3);
1851 pack_unpack_u8(4);
1852 pack_unpack_u8(5);
1853 pack_unpack_u8(6);
1854 pack_unpack_u8(7);
1855 pack_unpack_u8(8);
1856
1857 pack_unpack_u16(0);
1858 pack_unpack_u16(1);
1859 pack_unpack_u16(2);
1860 pack_unpack_u16(3);
1861 pack_unpack_u16(4);
1862 pack_unpack_u16(5);
1863 pack_unpack_u16(6);
1864 pack_unpack_u16(7);
1865 pack_unpack_u16(8);
1866 pack_unpack_u16(9);
1867 pack_unpack_u16(10);
1868 pack_unpack_u16(11);
1869 pack_unpack_u16(12);
1870 pack_unpack_u16(13);
1871 pack_unpack_u16(14);
1872 pack_unpack_u16(15);
1873 pack_unpack_u16(16);
1874
1875 pack_unpack_u32(0);
1876 pack_unpack_u32(1);
1877 pack_unpack_u32(2);
1878 pack_unpack_u32(3);
1879 pack_unpack_u32(4);
1880 pack_unpack_u32(5);
1881 pack_unpack_u32(6);
1882 pack_unpack_u32(7);
1883 pack_unpack_u32(8);
1884 pack_unpack_u32(9);
1885 pack_unpack_u32(10);
1886 pack_unpack_u32(11);
1887 pack_unpack_u32(12);
1888 pack_unpack_u32(13);
1889 pack_unpack_u32(14);
1890 pack_unpack_u32(15);
1891 pack_unpack_u32(16);
1892 pack_unpack_u32(17);
1893 pack_unpack_u32(18);
1894 pack_unpack_u32(19);
1895 pack_unpack_u32(20);
1896 pack_unpack_u32(21);
1897 pack_unpack_u32(22);
1898 pack_unpack_u32(23);
1899 pack_unpack_u32(24);
1900 pack_unpack_u32(25);
1901 pack_unpack_u32(26);
1902 pack_unpack_u32(27);
1903 pack_unpack_u32(28);
1904 pack_unpack_u32(29);
1905 pack_unpack_u32(30);
1906 pack_unpack_u32(31);
1907 pack_unpack_u32(32);
1908
1909 pack_unpack_u64(0);
1910 pack_unpack_u64(1);
1911 pack_unpack_u64(2);
1912 pack_unpack_u64(3);
1913 pack_unpack_u64(4);
1914 pack_unpack_u64(5);
1915 pack_unpack_u64(6);
1916 pack_unpack_u64(7);
1917 pack_unpack_u64(8);
1918 pack_unpack_u64(9);
1919 pack_unpack_u64(10);
1920 pack_unpack_u64(11);
1921 pack_unpack_u64(12);
1922 pack_unpack_u64(13);
1923 pack_unpack_u64(14);
1924 pack_unpack_u64(15);
1925 pack_unpack_u64(16);
1926 pack_unpack_u64(17);
1927 pack_unpack_u64(18);
1928 pack_unpack_u64(19);
1929 pack_unpack_u64(20);
1930 pack_unpack_u64(21);
1931 pack_unpack_u64(22);
1932 pack_unpack_u64(23);
1933 pack_unpack_u64(24);
1934 pack_unpack_u64(25);
1935 pack_unpack_u64(26);
1936 pack_unpack_u64(27);
1937 pack_unpack_u64(28);
1938 pack_unpack_u64(29);
1939 pack_unpack_u64(30);
1940 pack_unpack_u64(31);
1941 pack_unpack_u64(32);
1942 pack_unpack_u64(33);
1943 pack_unpack_u64(34);
1944 pack_unpack_u64(35);
1945 pack_unpack_u64(36);
1946 pack_unpack_u64(37);
1947 pack_unpack_u64(38);
1948 pack_unpack_u64(39);
1949 pack_unpack_u64(40);
1950 pack_unpack_u64(41);
1951 pack_unpack_u64(42);
1952 pack_unpack_u64(43);
1953 pack_unpack_u64(44);
1954 pack_unpack_u64(45);
1955 pack_unpack_u64(46);
1956 pack_unpack_u64(47);
1957 pack_unpack_u64(48);
1958 pack_unpack_u64(49);
1959 pack_unpack_u64(50);
1960 pack_unpack_u64(51);
1961 pack_unpack_u64(52);
1962 pack_unpack_u64(53);
1963 pack_unpack_u64(54);
1964 pack_unpack_u64(55);
1965 pack_unpack_u64(56);
1966 pack_unpack_u64(57);
1967 pack_unpack_u64(58);
1968 pack_unpack_u64(59);
1969 pack_unpack_u64(60);
1970 pack_unpack_u64(61);
1971 pack_unpack_u64(62);
1972 pack_unpack_u64(63);
1973 pack_unpack_u64(64);
1974 }
1975
1976 fn unchecked_pack_unpack_u8(bit_width: usize) {
1977 let mut values = [0u8; 1024];
1978 let mut rng = XorShift::new(123456789);
1979 for value in &mut values {
1980 *value = (rng.next() % (1 << bit_width)) as u8;
1981 }
1982 let mut packed = vec![0; 1024 * bit_width / 8];
1983 unsafe {
1984 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
1985 }
1986 let mut output = [0; 1024];
1987 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
1988 assert_eq!(values, output);
1989 }
1990
1991 fn unchecked_pack_unpack_u16(bit_width: usize) {
1992 let mut values = [0u16; 1024];
1993 let mut rng = XorShift::new(123456789);
1994 for value in &mut values {
1995 *value = (rng.next() % (1 << bit_width)) as u16;
1996 }
1997 let mut packed = vec![0; 1024 * bit_width / u16::T];
1998 unsafe {
1999 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
2000 }
2001 let mut output = [0; 1024];
2002 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
2003 assert_eq!(values, output);
2004 }
2005
2006 fn unchecked_pack_unpack_u32(bit_width: usize) {
2007 let mut values = [0u32; 1024];
2008 let mut rng = XorShift::new(123456789);
2009 for value in &mut values {
2010 *value = (rng.next() % (1 << bit_width)) as u32;
2011 }
2012 let mut packed = vec![0; 1024 * bit_width / u32::T];
2013 unsafe {
2014 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
2015 }
2016 let mut output = [0; 1024];
2017 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
2018 assert_eq!(values, output);
2019 }
2020
2021 fn unchecked_pack_unpack_u64(bit_width: usize) {
2022 let mut values = [0u64; 1024];
2023 let mut rng = XorShift::new(123456789);
2024 if bit_width == 64 {
2025 for value in &mut values {
2026 *value = rng.next();
2027 }
2028 }
2029 let mut packed = vec![0; 1024 * bit_width / u64::T];
2030 unsafe {
2031 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
2032 }
2033 let mut output = [0; 1024];
2034 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
2035 assert_eq!(values, output);
2036 }
2037
2038 #[test]
2039 fn test_unchecked_pack() {
2040 let input = array::from_fn(|i| i as u32);
2041 let mut packed = [0; 320];
2042 unsafe { BitPacking::unchecked_pack(10, &input, &mut packed) };
2043 let mut output = [0; 1024];
2044 unsafe { BitPacking::unchecked_unpack(10, &packed, &mut output) };
2045 assert_eq!(input, output);
2046
2047 unchecked_pack_unpack_u8(1);
2048 unchecked_pack_unpack_u8(2);
2049 unchecked_pack_unpack_u8(3);
2050 unchecked_pack_unpack_u8(4);
2051 unchecked_pack_unpack_u8(5);
2052 unchecked_pack_unpack_u8(6);
2053 unchecked_pack_unpack_u8(7);
2054 unchecked_pack_unpack_u8(8);
2055
2056 unchecked_pack_unpack_u16(1);
2057 unchecked_pack_unpack_u16(2);
2058 unchecked_pack_unpack_u16(3);
2059 unchecked_pack_unpack_u16(4);
2060 unchecked_pack_unpack_u16(5);
2061 unchecked_pack_unpack_u16(6);
2062 unchecked_pack_unpack_u16(7);
2063 unchecked_pack_unpack_u16(8);
2064 unchecked_pack_unpack_u16(9);
2065 unchecked_pack_unpack_u16(10);
2066 unchecked_pack_unpack_u16(11);
2067 unchecked_pack_unpack_u16(12);
2068 unchecked_pack_unpack_u16(13);
2069 unchecked_pack_unpack_u16(14);
2070 unchecked_pack_unpack_u16(15);
2071 unchecked_pack_unpack_u16(16);
2072
2073 unchecked_pack_unpack_u32(1);
2074 unchecked_pack_unpack_u32(2);
2075 unchecked_pack_unpack_u32(3);
2076 unchecked_pack_unpack_u32(4);
2077 unchecked_pack_unpack_u32(5);
2078 unchecked_pack_unpack_u32(6);
2079 unchecked_pack_unpack_u32(7);
2080 unchecked_pack_unpack_u32(8);
2081 unchecked_pack_unpack_u32(9);
2082 unchecked_pack_unpack_u32(10);
2083 unchecked_pack_unpack_u32(11);
2084 unchecked_pack_unpack_u32(12);
2085 unchecked_pack_unpack_u32(13);
2086 unchecked_pack_unpack_u32(14);
2087 unchecked_pack_unpack_u32(15);
2088 unchecked_pack_unpack_u32(16);
2089 unchecked_pack_unpack_u32(17);
2090 unchecked_pack_unpack_u32(18);
2091 unchecked_pack_unpack_u32(19);
2092 unchecked_pack_unpack_u32(20);
2093 unchecked_pack_unpack_u32(21);
2094 unchecked_pack_unpack_u32(22);
2095 unchecked_pack_unpack_u32(23);
2096 unchecked_pack_unpack_u32(24);
2097 unchecked_pack_unpack_u32(25);
2098 unchecked_pack_unpack_u32(26);
2099 unchecked_pack_unpack_u32(27);
2100 unchecked_pack_unpack_u32(28);
2101 unchecked_pack_unpack_u32(29);
2102 unchecked_pack_unpack_u32(30);
2103 unchecked_pack_unpack_u32(31);
2104 unchecked_pack_unpack_u32(32);
2105
2106 unchecked_pack_unpack_u64(1);
2107 unchecked_pack_unpack_u64(2);
2108 unchecked_pack_unpack_u64(3);
2109 unchecked_pack_unpack_u64(4);
2110 unchecked_pack_unpack_u64(5);
2111 unchecked_pack_unpack_u64(6);
2112 unchecked_pack_unpack_u64(7);
2113 unchecked_pack_unpack_u64(8);
2114 unchecked_pack_unpack_u64(9);
2115 unchecked_pack_unpack_u64(10);
2116 unchecked_pack_unpack_u64(11);
2117 unchecked_pack_unpack_u64(12);
2118 unchecked_pack_unpack_u64(13);
2119 unchecked_pack_unpack_u64(14);
2120 unchecked_pack_unpack_u64(15);
2121 unchecked_pack_unpack_u64(16);
2122 unchecked_pack_unpack_u64(17);
2123 unchecked_pack_unpack_u64(18);
2124 unchecked_pack_unpack_u64(19);
2125 unchecked_pack_unpack_u64(20);
2126 unchecked_pack_unpack_u64(21);
2127 unchecked_pack_unpack_u64(22);
2128 unchecked_pack_unpack_u64(23);
2129 unchecked_pack_unpack_u64(24);
2130 unchecked_pack_unpack_u64(25);
2131 unchecked_pack_unpack_u64(26);
2132 unchecked_pack_unpack_u64(27);
2133 unchecked_pack_unpack_u64(28);
2134 unchecked_pack_unpack_u64(29);
2135 unchecked_pack_unpack_u64(30);
2136 unchecked_pack_unpack_u64(31);
2137 unchecked_pack_unpack_u64(32);
2138 unchecked_pack_unpack_u64(33);
2139 unchecked_pack_unpack_u64(34);
2140 unchecked_pack_unpack_u64(35);
2141 unchecked_pack_unpack_u64(36);
2142 unchecked_pack_unpack_u64(37);
2143 unchecked_pack_unpack_u64(38);
2144 unchecked_pack_unpack_u64(39);
2145 unchecked_pack_unpack_u64(40);
2146 unchecked_pack_unpack_u64(41);
2147 unchecked_pack_unpack_u64(42);
2148 unchecked_pack_unpack_u64(43);
2149 unchecked_pack_unpack_u64(44);
2150 unchecked_pack_unpack_u64(45);
2151 unchecked_pack_unpack_u64(46);
2152 unchecked_pack_unpack_u64(47);
2153 unchecked_pack_unpack_u64(48);
2154 unchecked_pack_unpack_u64(49);
2155 unchecked_pack_unpack_u64(50);
2156 unchecked_pack_unpack_u64(51);
2157 unchecked_pack_unpack_u64(52);
2158 unchecked_pack_unpack_u64(53);
2159 unchecked_pack_unpack_u64(54);
2160 unchecked_pack_unpack_u64(55);
2161 unchecked_pack_unpack_u64(56);
2162 unchecked_pack_unpack_u64(57);
2163 unchecked_pack_unpack_u64(58);
2164 unchecked_pack_unpack_u64(59);
2165 unchecked_pack_unpack_u64(60);
2166 unchecked_pack_unpack_u64(61);
2167 unchecked_pack_unpack_u64(62);
2168 unchecked_pack_unpack_u64(63);
2169 unchecked_pack_unpack_u64(64);
2170 }
2171}