bitpacking/
macros.rs

1macro_rules! pack_unpack_with_bits {
2
3    ($name:ident, $n:expr, $cpufeature:meta) => {
4
5
6        mod $name {
7
8            use crunchy::unroll;
9            use super::BLOCK_LEN;
10            use super::{Sink, Transformer};
11            use super::{DataType,
12                set1,
13                right_shift_32,
14                left_shift_32,
15                op_or,
16                op_and,
17                load_unaligned,
18                store_unaligned};
19
20            const NUM_BITS: usize = $n;
21            const NUM_BYTES_PER_BLOCK: usize = NUM_BITS * BLOCK_LEN / 8;
22
23            #[$cpufeature]
24            pub(crate) unsafe fn pack<TDeltaComputer: Transformer>(input_arr: &[u32], output_arr: &mut [u8], mut delta_computer: TDeltaComputer) -> usize {
25                assert_eq!(input_arr.len(), BLOCK_LEN, "Input block too small {}, (expected {})", input_arr.len(), BLOCK_LEN);
26                assert!(output_arr.len() >= NUM_BYTES_PER_BLOCK, "Output array too small (numbits {}). {} <= {}", NUM_BITS, output_arr.len(), NUM_BYTES_PER_BLOCK);
27
28                let input_ptr = input_arr.as_ptr() as *const DataType;
29                let mut output_ptr = output_arr.as_mut_ptr() as *mut DataType;
30                let mut out_register: DataType = delta_computer.transform(load_unaligned(input_ptr));
31
32                unroll! {
33                    for iter in 0..30 {
34                        const i: usize = 1 + iter;
35
36                        const bits_filled: usize = i * NUM_BITS;
37                        const inner_cursor: usize = bits_filled % 32;
38                        const remaining: usize = 32 - inner_cursor;
39
40                        let offset_ptr = input_ptr.add(i);
41                        let in_register: DataType = delta_computer.transform(load_unaligned(offset_ptr));
42
43                        out_register =
44                            if inner_cursor > 0 {
45                                op_or(out_register, left_shift_32::<{inner_cursor as i32}>(in_register))
46                            } else {
47                                in_register
48                            };
49
50                        if remaining <= NUM_BITS {
51                            store_unaligned(output_ptr, out_register);
52                            output_ptr = output_ptr.offset(1);
53                            if 0 < remaining && remaining < NUM_BITS {
54                                out_register = right_shift_32::<{remaining as i32}>(in_register);
55                            }
56                        }
57                    }
58                }
59                let in_register: DataType = delta_computer.transform(load_unaligned(input_ptr.add(31)));
60                out_register = if 32 - NUM_BITS > 0 {
61                    op_or(out_register, left_shift_32::<{32 - NUM_BITS as i32}>(in_register))
62                } else {
63                    op_or(out_register, in_register)
64                };
65                store_unaligned(output_ptr, out_register);
66
67                NUM_BYTES_PER_BLOCK
68            }
69
70            #[$cpufeature]
71            pub(crate) unsafe fn unpack<Output: Sink>(compressed: &[u8], mut output: Output) -> usize {
72
73                assert!(compressed.len() >= NUM_BYTES_PER_BLOCK, "Compressed array seems too small. ({} < {}) ", compressed.len(), NUM_BYTES_PER_BLOCK);
74
75                let mut input_ptr = compressed.as_ptr() as *const DataType;
76
77                let mask_scalar: u32 = ((1u64 << NUM_BITS) - 1u64) as u32;
78                let mask = set1(mask_scalar as i32);
79
80                let mut in_register: DataType = load_unaligned(input_ptr);
81
82                let out_register = op_and(in_register, mask);
83                output.process(out_register);
84
85                unroll! {
86                    for iter in 0..31 {
87                        const i: usize = iter + 1;
88
89                        const inner_cursor: usize = (i * NUM_BITS) % 32;
90                        const inner_capacity: usize = 32 - inner_cursor;
91
92                        let shifted_in_register = if inner_cursor != 0 {
93                            right_shift_32::<{inner_cursor as i32}>(in_register)
94                        } else {
95                            in_register
96                        };
97                        let mut out_register: DataType = op_and(shifted_in_register, mask);
98
99                        // We consumed our current quadruplets entirely.
100                        // We therefore read another one.
101                        if inner_capacity <= NUM_BITS && i != 31 {
102                            input_ptr = input_ptr.add(1);
103                            in_register = load_unaligned(input_ptr);
104
105                            // This quadruplets is actually cutting one of
106                            // our `DataType`. We need to read the next one.
107                            if inner_capacity < NUM_BITS {
108                                let shifted = if inner_capacity != 0 {
109                                    left_shift_32::<{inner_capacity as i32}>(in_register)
110                                } else {
111                                    in_register
112                                };
113                                let masked = op_and(shifted, mask);
114                                out_register = op_or(out_register, masked);
115                            }
116                        }
117
118                        output.process(out_register);
119                    }
120                }
121
122
123                NUM_BYTES_PER_BLOCK
124            }
125        }
126    }
127}
128
129macro_rules! pack_unpack_with_bits_32 {
130    ($cpufeature:meta) => {
131        mod pack_unpack_with_bits_32 {
132            use super::BLOCK_LEN;
133            use super::{load_unaligned, store_unaligned, DataType};
134            use super::{Sink, Transformer};
135            use crunchy::unroll;
136
137            const NUM_BITS: usize = 32;
138            const NUM_BYTES_PER_BLOCK: usize = NUM_BITS * BLOCK_LEN / 8;
139
140            #[$cpufeature]
141            pub(crate) unsafe fn pack<TDeltaComputer: Transformer>(
142                input_arr: &[u32],
143                output_arr: &mut [u8],
144                mut delta_computer: TDeltaComputer,
145            ) -> usize {
146                assert_eq!(
147                    input_arr.len(),
148                    BLOCK_LEN,
149                    "Input block too small {}, (expected {})",
150                    input_arr.len(),
151                    BLOCK_LEN
152                );
153                assert!(
154                    output_arr.len() >= NUM_BYTES_PER_BLOCK,
155                    "Output array too small (numbits {}). {} <= {}",
156                    NUM_BITS,
157                    output_arr.len(),
158                    NUM_BYTES_PER_BLOCK
159                );
160
161                let input_ptr: *const DataType = input_arr.as_ptr() as *const DataType;
162                let output_ptr = output_arr.as_mut_ptr() as *mut DataType;
163                unroll! {
164                    for i in 0..32 {
165                        let input_offset_ptr = input_ptr.offset(i as isize);
166                        let output_offset_ptr = output_ptr.offset(i as isize);
167                        let input_register = load_unaligned(input_offset_ptr);
168                        let output_register = delta_computer.transform(input_register);
169                        store_unaligned(output_offset_ptr, output_register);
170                    }
171                }
172                NUM_BYTES_PER_BLOCK
173            }
174
175            #[$cpufeature]
176            pub(crate) unsafe fn unpack<Output: Sink>(
177                compressed: &[u8],
178                mut output: Output,
179            ) -> usize {
180                assert!(
181                    compressed.len() >= NUM_BYTES_PER_BLOCK,
182                    "Compressed array seems too small. ({} < {}) ",
183                    compressed.len(),
184                    NUM_BYTES_PER_BLOCK
185                );
186                let input_ptr = compressed.as_ptr() as *const DataType;
187                for i in 0..32 {
188                    let input_offset_ptr = input_ptr.offset(i as isize);
189                    let in_register: DataType = load_unaligned(input_offset_ptr);
190                    output.process(in_register);
191                }
192                NUM_BYTES_PER_BLOCK
193            }
194        }
195    };
196}
197
198macro_rules! declare_bitpacker {
199    ($cpufeature:meta) => {
200        use super::super::UnsafeBitPacker;
201        use crate::most_significant_bit;
202        use crunchy::unroll;
203
204        pack_unpack_with_bits!(pack_unpack_with_bits_1, 1, $cpufeature);
205        pack_unpack_with_bits!(pack_unpack_with_bits_2, 2, $cpufeature);
206        pack_unpack_with_bits!(pack_unpack_with_bits_3, 3, $cpufeature);
207        pack_unpack_with_bits!(pack_unpack_with_bits_4, 4, $cpufeature);
208        pack_unpack_with_bits!(pack_unpack_with_bits_5, 5, $cpufeature);
209        pack_unpack_with_bits!(pack_unpack_with_bits_6, 6, $cpufeature);
210        pack_unpack_with_bits!(pack_unpack_with_bits_7, 7, $cpufeature);
211        pack_unpack_with_bits!(pack_unpack_with_bits_8, 8, $cpufeature);
212        pack_unpack_with_bits!(pack_unpack_with_bits_9, 9, $cpufeature);
213        pack_unpack_with_bits!(pack_unpack_with_bits_10, 10, $cpufeature);
214        pack_unpack_with_bits!(pack_unpack_with_bits_11, 11, $cpufeature);
215        pack_unpack_with_bits!(pack_unpack_with_bits_12, 12, $cpufeature);
216        pack_unpack_with_bits!(pack_unpack_with_bits_13, 13, $cpufeature);
217        pack_unpack_with_bits!(pack_unpack_with_bits_14, 14, $cpufeature);
218        pack_unpack_with_bits!(pack_unpack_with_bits_15, 15, $cpufeature);
219        pack_unpack_with_bits!(pack_unpack_with_bits_16, 16, $cpufeature);
220        pack_unpack_with_bits!(pack_unpack_with_bits_17, 17, $cpufeature);
221        pack_unpack_with_bits!(pack_unpack_with_bits_18, 18, $cpufeature);
222        pack_unpack_with_bits!(pack_unpack_with_bits_19, 19, $cpufeature);
223        pack_unpack_with_bits!(pack_unpack_with_bits_20, 20, $cpufeature);
224        pack_unpack_with_bits!(pack_unpack_with_bits_21, 21, $cpufeature);
225        pack_unpack_with_bits!(pack_unpack_with_bits_22, 22, $cpufeature);
226        pack_unpack_with_bits!(pack_unpack_with_bits_23, 23, $cpufeature);
227        pack_unpack_with_bits!(pack_unpack_with_bits_24, 24, $cpufeature);
228        pack_unpack_with_bits!(pack_unpack_with_bits_25, 25, $cpufeature);
229        pack_unpack_with_bits!(pack_unpack_with_bits_26, 26, $cpufeature);
230        pack_unpack_with_bits!(pack_unpack_with_bits_27, 27, $cpufeature);
231        pack_unpack_with_bits!(pack_unpack_with_bits_28, 28, $cpufeature);
232        pack_unpack_with_bits!(pack_unpack_with_bits_29, 29, $cpufeature);
233        pack_unpack_with_bits!(pack_unpack_with_bits_30, 30, $cpufeature);
234        pack_unpack_with_bits!(pack_unpack_with_bits_31, 31, $cpufeature);
235        pack_unpack_with_bits_32!($cpufeature);
236
237        unsafe fn compress_generic<DeltaComputer: Transformer>(
238            decompressed: &[u32],
239            compressed: &mut [u8],
240            num_bits: u8,
241            delta_computer: DeltaComputer,
242        ) -> usize {
243            match num_bits {
244                0 => 0,
245                1 => pack_unpack_with_bits_1::pack(decompressed, compressed, delta_computer),
246                2 => pack_unpack_with_bits_2::pack(decompressed, compressed, delta_computer),
247                3 => pack_unpack_with_bits_3::pack(decompressed, compressed, delta_computer),
248                4 => pack_unpack_with_bits_4::pack(decompressed, compressed, delta_computer),
249                5 => pack_unpack_with_bits_5::pack(decompressed, compressed, delta_computer),
250                6 => pack_unpack_with_bits_6::pack(decompressed, compressed, delta_computer),
251                7 => pack_unpack_with_bits_7::pack(decompressed, compressed, delta_computer),
252                8 => pack_unpack_with_bits_8::pack(decompressed, compressed, delta_computer),
253                9 => pack_unpack_with_bits_9::pack(decompressed, compressed, delta_computer),
254                10 => pack_unpack_with_bits_10::pack(decompressed, compressed, delta_computer),
255                11 => pack_unpack_with_bits_11::pack(decompressed, compressed, delta_computer),
256                12 => pack_unpack_with_bits_12::pack(decompressed, compressed, delta_computer),
257                13 => pack_unpack_with_bits_13::pack(decompressed, compressed, delta_computer),
258                14 => pack_unpack_with_bits_14::pack(decompressed, compressed, delta_computer),
259                15 => pack_unpack_with_bits_15::pack(decompressed, compressed, delta_computer),
260                16 => pack_unpack_with_bits_16::pack(decompressed, compressed, delta_computer),
261                17 => pack_unpack_with_bits_17::pack(decompressed, compressed, delta_computer),
262                18 => pack_unpack_with_bits_18::pack(decompressed, compressed, delta_computer),
263                19 => pack_unpack_with_bits_19::pack(decompressed, compressed, delta_computer),
264                20 => pack_unpack_with_bits_20::pack(decompressed, compressed, delta_computer),
265                21 => pack_unpack_with_bits_21::pack(decompressed, compressed, delta_computer),
266                22 => pack_unpack_with_bits_22::pack(decompressed, compressed, delta_computer),
267                23 => pack_unpack_with_bits_23::pack(decompressed, compressed, delta_computer),
268                24 => pack_unpack_with_bits_24::pack(decompressed, compressed, delta_computer),
269                25 => pack_unpack_with_bits_25::pack(decompressed, compressed, delta_computer),
270                26 => pack_unpack_with_bits_26::pack(decompressed, compressed, delta_computer),
271                27 => pack_unpack_with_bits_27::pack(decompressed, compressed, delta_computer),
272                28 => pack_unpack_with_bits_28::pack(decompressed, compressed, delta_computer),
273                29 => pack_unpack_with_bits_29::pack(decompressed, compressed, delta_computer),
274                30 => pack_unpack_with_bits_30::pack(decompressed, compressed, delta_computer),
275                31 => pack_unpack_with_bits_31::pack(decompressed, compressed, delta_computer),
276                32 => pack_unpack_with_bits_32::pack(decompressed, compressed, delta_computer),
277                _ => {
278                    panic!("Num bits must be <= 32. Was {}.", num_bits);
279                }
280            }
281        }
282
283        pub trait Transformer {
284            unsafe fn transform(&mut self, data: DataType) -> DataType;
285        }
286
287        struct NoDelta;
288
289        impl Transformer for NoDelta {
290            #[inline]
291            unsafe fn transform(&mut self, current: DataType) -> DataType {
292                current
293            }
294        }
295
296        struct DeltaComputer {
297            pub previous: DataType,
298        }
299
300        impl Transformer for DeltaComputer {
301            #[inline]
302            unsafe fn transform(&mut self, current: DataType) -> DataType {
303                let result = compute_delta(current, self.previous);
304                self.previous = current;
305                result
306            }
307        }
308
309        struct StrictDeltaComputer {
310            pub previous: DataType,
311        }
312
313        impl Transformer for StrictDeltaComputer {
314            #[inline]
315            unsafe fn transform(&mut self, current: DataType) -> DataType {
316                let result = compute_delta(current, self.previous);
317                self.previous = current;
318                sub(result, set1(1))
319            }
320        }
321
322        pub trait Sink {
323            unsafe fn process(&mut self, data_type: DataType);
324        }
325
326        struct Store {
327            output_ptr: *mut DataType,
328        }
329
330        impl Store {
331            fn new(output_ptr: *mut DataType) -> Store {
332                Store { output_ptr }
333            }
334        }
335
336        struct DeltaIntegrate {
337            current: DataType,
338            output_ptr: *mut DataType,
339        }
340
341        impl DeltaIntegrate {
342            unsafe fn new(initial: u32, output_ptr: *mut DataType) -> DeltaIntegrate {
343                DeltaIntegrate {
344                    current: set1(initial as i32),
345                    output_ptr,
346                }
347            }
348        }
349
350        impl Sink for DeltaIntegrate {
351            #[inline]
352            unsafe fn process(&mut self, delta: DataType) {
353                self.current = integrate_delta(self.current, delta);
354                store_unaligned(self.output_ptr, self.current);
355                self.output_ptr = self.output_ptr.add(1);
356            }
357        }
358
359        struct StrictDeltaIntegrate {
360            current: DataType,
361            output_ptr: *mut DataType,
362        }
363
364        impl StrictDeltaIntegrate {
365            unsafe fn new(initial: u32, output_ptr: *mut DataType) -> StrictDeltaIntegrate {
366                StrictDeltaIntegrate {
367                    current: set1(initial as i32),
368                    output_ptr,
369                }
370            }
371        }
372
373        impl Sink for StrictDeltaIntegrate {
374            #[inline]
375            unsafe fn process(&mut self, delta: DataType) {
376                self.current = integrate_delta(self.current, add(delta, set1(1)));
377                store_unaligned(self.output_ptr, self.current);
378                self.output_ptr = self.output_ptr.add(1);
379            }
380        }
381
382        impl Sink for Store {
383            #[inline]
384            unsafe fn process(&mut self, out_register: DataType) {
385                store_unaligned(self.output_ptr, out_register);
386                self.output_ptr = self.output_ptr.add(1);
387            }
388        }
389
390        #[inline]
391        unsafe fn decompress_to<Output: Sink>(
392            compressed: &[u8],
393            mut sink: Output,
394            num_bits: u8,
395        ) -> usize {
396            match num_bits {
397                0 => {
398                    let zero = set1(0i32);
399                    for _ in 0..32 {
400                        sink.process(zero);
401                    }
402                    0
403                }
404                1 => pack_unpack_with_bits_1::unpack(compressed, sink),
405                2 => pack_unpack_with_bits_2::unpack(compressed, sink),
406                3 => pack_unpack_with_bits_3::unpack(compressed, sink),
407                4 => pack_unpack_with_bits_4::unpack(compressed, sink),
408                5 => pack_unpack_with_bits_5::unpack(compressed, sink),
409                6 => pack_unpack_with_bits_6::unpack(compressed, sink),
410                7 => pack_unpack_with_bits_7::unpack(compressed, sink),
411                8 => pack_unpack_with_bits_8::unpack(compressed, sink),
412                9 => pack_unpack_with_bits_9::unpack(compressed, sink),
413                10 => pack_unpack_with_bits_10::unpack(compressed, sink),
414                11 => pack_unpack_with_bits_11::unpack(compressed, sink),
415                12 => pack_unpack_with_bits_12::unpack(compressed, sink),
416                13 => pack_unpack_with_bits_13::unpack(compressed, sink),
417                14 => pack_unpack_with_bits_14::unpack(compressed, sink),
418                15 => pack_unpack_with_bits_15::unpack(compressed, sink),
419                16 => pack_unpack_with_bits_16::unpack(compressed, sink),
420                17 => pack_unpack_with_bits_17::unpack(compressed, sink),
421                18 => pack_unpack_with_bits_18::unpack(compressed, sink),
422                19 => pack_unpack_with_bits_19::unpack(compressed, sink),
423                20 => pack_unpack_with_bits_20::unpack(compressed, sink),
424                21 => pack_unpack_with_bits_21::unpack(compressed, sink),
425                22 => pack_unpack_with_bits_22::unpack(compressed, sink),
426                23 => pack_unpack_with_bits_23::unpack(compressed, sink),
427                24 => pack_unpack_with_bits_24::unpack(compressed, sink),
428                25 => pack_unpack_with_bits_25::unpack(compressed, sink),
429                26 => pack_unpack_with_bits_26::unpack(compressed, sink),
430                27 => pack_unpack_with_bits_27::unpack(compressed, sink),
431                28 => pack_unpack_with_bits_28::unpack(compressed, sink),
432                29 => pack_unpack_with_bits_29::unpack(compressed, sink),
433                30 => pack_unpack_with_bits_30::unpack(compressed, sink),
434                31 => pack_unpack_with_bits_31::unpack(compressed, sink),
435                32 => pack_unpack_with_bits_32::unpack(compressed, sink),
436                _ => {
437                    panic!("Num bits must be <= 32. Was {}.", num_bits);
438                }
439            }
440        }
441
442        pub struct UnsafeBitPackerImpl;
443
444        impl UnsafeBitPacker for UnsafeBitPackerImpl {
445            const BLOCK_LEN: usize = BLOCK_LEN;
446
447            #[$cpufeature]
448            unsafe fn compress(decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize {
449                compress_generic(decompressed, compressed, num_bits, NoDelta)
450            }
451
452            #[$cpufeature]
453            unsafe fn compress_sorted(
454                initial: u32,
455                decompressed: &[u32],
456                compressed: &mut [u8],
457                num_bits: u8,
458            ) -> usize {
459                let delta_computer = DeltaComputer {
460                    previous: set1(initial as i32),
461                };
462                compress_generic(decompressed, compressed, num_bits, delta_computer)
463            }
464
465            #[$cpufeature]
466            unsafe fn compress_strictly_sorted(
467                initial: Option<u32>,
468                decompressed: &[u32],
469                compressed: &mut [u8],
470                num_bits: u8,
471            ) -> usize {
472                // to allow encoding [0, 1, 2, ..], we need to permit an initial value "lower" than
473                // zero. To get a clean api, that value is None, but in practice, as we work on
474                // wrapping integers, u32::MAX/-1 does the job just fine.
475                let initial = initial.unwrap_or(u32::MAX);
476                let delta_computer = StrictDeltaComputer {
477                    previous: set1(initial as i32),
478                };
479                compress_generic(decompressed, compressed, num_bits, delta_computer)
480            }
481
482            #[$cpufeature]
483            unsafe fn decompress(
484                compressed: &[u8],
485                decompressed: &mut [u32],
486                num_bits: u8,
487            ) -> usize {
488                assert!(
489                    decompressed.len() >= BLOCK_LEN,
490                    "The output array is not large enough : ({} >= {})",
491                    decompressed.len(),
492                    BLOCK_LEN
493                );
494                let output_ptr = decompressed.as_mut_ptr() as *mut DataType;
495                let output = Store::new(output_ptr);
496                decompress_to(compressed, output, num_bits)
497            }
498
499            #[$cpufeature]
500            unsafe fn decompress_sorted(
501                initial: u32,
502                compressed: &[u8],
503                decompressed: &mut [u32],
504                num_bits: u8,
505            ) -> usize {
506                assert!(
507                    decompressed.len() >= BLOCK_LEN,
508                    "The output array is not large enough : ({} >= {})",
509                    decompressed.len(),
510                    BLOCK_LEN
511                );
512                let output_ptr = decompressed.as_mut_ptr() as *mut DataType;
513                let output = DeltaIntegrate::new(initial, output_ptr);
514                decompress_to(compressed, output, num_bits)
515            }
516
517            #[$cpufeature]
518            unsafe fn decompress_strictly_sorted(
519                initial: Option<u32>,
520                compressed: &[u8],
521                decompressed: &mut [u32],
522                num_bits: u8,
523            ) -> usize {
524                assert!(
525                    decompressed.len() >= BLOCK_LEN,
526                    "The output array is not large enough : ({} >= {})",
527                    decompressed.len(),
528                    BLOCK_LEN
529                );
530                let initial = initial.unwrap_or(u32::MAX);
531                let output_ptr = decompressed.as_mut_ptr() as *mut DataType;
532                let output = StrictDeltaIntegrate::new(initial, output_ptr);
533                decompress_to(compressed, output, num_bits)
534            }
535
536            #[$cpufeature]
537            unsafe fn num_bits(decompressed: &[u32]) -> u8 {
538                assert_eq!(
539                    decompressed.len(),
540                    BLOCK_LEN,
541                    "`decompressed`'s len is not `BLOCK_LEN={}`",
542                    BLOCK_LEN
543                );
544                let data: *const DataType = decompressed.as_ptr() as *const DataType;
545                let mut accumulator = load_unaligned(data);
546                unroll! {
547                    for iter in 0..31 {
548                        let i = iter + 1;
549                        let newvec = load_unaligned(data.add(i));
550                        accumulator = op_or(accumulator, newvec);
551                    }
552                }
553                most_significant_bit(or_collapse_to_u32(accumulator))
554            }
555
556            #[$cpufeature]
557            unsafe fn num_bits_sorted(initial: u32, decompressed: &[u32]) -> u8 {
558                assert_eq!(
559                    decompressed.len(),
560                    BLOCK_LEN,
561                    "`decompressed`'s len is not `BLOCK_LEN={}`",
562                    BLOCK_LEN
563                );
564                let initial_vec = set1(initial as i32);
565                let data: *const DataType = decompressed.as_ptr() as *const DataType;
566
567                let first = load_unaligned(data);
568                let mut accumulator = compute_delta(load_unaligned(data), initial_vec);
569                let mut previous = first;
570
571                unroll! {
572                    for iter in 0..30 {
573                        let i = iter + 1;
574                        let current = load_unaligned(data.add(i));
575                        let delta = compute_delta(current, previous);
576                        accumulator =  op_or(accumulator, delta);
577                        previous = current;
578                    }
579                }
580                let current = load_unaligned(data.add(31));
581                let delta = compute_delta(current, previous);
582                accumulator = op_or(accumulator, delta);
583                most_significant_bit(or_collapse_to_u32(accumulator))
584            }
585
586            #[$cpufeature]
587            unsafe fn num_bits_strictly_sorted(initial: Option<u32>, decompressed: &[u32]) -> u8 {
588                assert_eq!(
589                    decompressed.len(),
590                    BLOCK_LEN,
591                    "`decompressed`'s len is not `BLOCK_LEN={}`",
592                    BLOCK_LEN
593                );
594                let initial = initial.unwrap_or(u32::MAX);
595                let initial_vec = set1(initial as i32);
596                let one = set1(1);
597                let data: *const DataType = decompressed.as_ptr() as *const DataType;
598
599                let first = load_unaligned(data);
600                let mut accumulator = sub(compute_delta(load_unaligned(data), initial_vec), one);
601                let mut previous = first;
602
603                unroll! {
604                    for iter in 0..30 {
605                        let i = iter + 1;
606                        let current = load_unaligned(data.add(i));
607                        let delta = sub(compute_delta(current, previous), one);
608                        accumulator =  op_or(accumulator, delta);
609                        previous = current;
610                    }
611                }
612                let current = load_unaligned(data.add(31));
613                let delta = sub(compute_delta(current, previous), one);
614                accumulator = op_or(accumulator, delta);
615                most_significant_bit(or_collapse_to_u32(accumulator))
616            }
617        }
618
619        #[cfg(test)]
620        mod tests {
621            use super::UnsafeBitPackerImpl;
622            use crate::tests::{test_suite_compress_decompress, DeltaKind};
623            use crate::Available;
624            use crate::UnsafeBitPacker;
625
626            #[test]
627            fn test_num_bits() {
628                if UnsafeBitPackerImpl::available() {
629                    for num_bits in 0..32 {
630                        for pos in 0..32 {
631                            let mut vals = [0u32; UnsafeBitPackerImpl::BLOCK_LEN];
632                            if num_bits > 0 {
633                                vals[pos] = 1 << (num_bits - 1);
634                            }
635                            assert_eq!(
636                                unsafe { UnsafeBitPackerImpl::num_bits(&vals[..]) },
637                                num_bits
638                            );
639                        }
640                    }
641                }
642            }
643
644            #[test]
645            fn test_bitpacker() {
646                if UnsafeBitPackerImpl::available() {
647                    test_suite_compress_decompress::<UnsafeBitPackerImpl>(DeltaKind::NoDelta);
648                }
649            }
650
651            #[test]
652            fn test_bitpacker_delta() {
653                if UnsafeBitPackerImpl::available() {
654                    test_suite_compress_decompress::<UnsafeBitPackerImpl>(DeltaKind::Delta);
655                }
656            }
657
658            #[test]
659            fn test_bitpacker_strict_delta() {
660                if UnsafeBitPackerImpl::available() {
661                    test_suite_compress_decompress::<UnsafeBitPackerImpl>(DeltaKind::StrictDelta);
662                }
663            }
664        }
665    };
666}