polars_parquet/parquet/encoding/bitpacked/
mod.rs

1macro_rules! seq_macro {
2    ($i:ident in 1..15 $block:block) => {
3        seq_macro!($i in [
4                 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
5        ] $block)
6    };
7    ($i:ident in 0..16 $block:block) => {
8        seq_macro!($i in [
9             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
10        ] $block)
11    };
12    ($i:ident in 0..=16 $block:block) => {
13        seq_macro!($i in [
14             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
15            16,
16        ] $block)
17    };
18    ($i:ident in 1..31 $block:block) => {
19        seq_macro!($i in [
20                 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
21            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
22        ] $block)
23    };
24    ($i:ident in 0..32 $block:block) => {
25        seq_macro!($i in [
26             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
27            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
28        ] $block)
29    };
30    ($i:ident in 0..=32 $block:block) => {
31        seq_macro!($i in [
32             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
33            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
34            32,
35        ] $block)
36    };
37    ($i:ident in 1..63 $block:block) => {
38        seq_macro!($i in [
39                 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
40            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
41            32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
42            48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
43        ] $block)
44    };
45    ($i:ident in 0..64 $block:block) => {
46        seq_macro!($i in [
47             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
48            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
49            32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
50            48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
51        ] $block)
52    };
53    ($i:ident in 0..=64 $block:block) => {
54        seq_macro!($i in [
55             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
56            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
57            32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
58            48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
59            64,
60        ] $block)
61    };
62    ($i:ident in [$($value:literal),+ $(,)?] $block:block) => {
63        $({
64            #[allow(non_upper_case_globals)]
65            const $i: usize = $value;
66            { $block }
67        })+
68    };
69}
70
71mod decode;
72mod encode;
73mod pack;
74mod unpack;
75
76pub use decode::{ChunkedDecoder, Decoder};
77pub use encode::{encode, encode_pack};
78
79/// A byte slice (e.g. `[u8; 8]`) denoting types that represent complete packs.
80pub trait Packed:
81    Copy
82    + Sized
83    + AsRef<[u8]>
84    + AsMut<[u8]>
85    + std::ops::IndexMut<usize, Output = u8>
86    + for<'a> TryFrom<&'a [u8]>
87{
88    const LENGTH: usize;
89    fn zero() -> Self;
90}
91
92impl Packed for [u8; 8] {
93    const LENGTH: usize = 8;
94    #[inline]
95    fn zero() -> Self {
96        [0; 8]
97    }
98}
99
100impl Packed for [u8; 16 * 2] {
101    const LENGTH: usize = 16 * 2;
102    #[inline]
103    fn zero() -> Self {
104        [0; 16 * 2]
105    }
106}
107
108impl Packed for [u8; 32 * 4] {
109    const LENGTH: usize = 32 * 4;
110    #[inline]
111    fn zero() -> Self {
112        [0; 32 * 4]
113    }
114}
115
116impl Packed for [u8; 64 * 8] {
117    const LENGTH: usize = 64 * 8;
118    #[inline]
119    fn zero() -> Self {
120        [0; 64 * 8]
121    }
122}
123
124/// A byte slice of [`Unpackable`] denoting complete unpacked arrays.
125pub trait Unpacked<T>:
126    Copy
127    + Sized
128    + AsRef<[T]>
129    + AsMut<[T]>
130    + std::ops::Index<usize, Output = T>
131    + std::ops::IndexMut<usize, Output = T>
132    + for<'a> TryFrom<&'a [T], Error = std::array::TryFromSliceError>
133{
134    const LENGTH: usize;
135    fn zero() -> Self;
136}
137
138impl Unpacked<u8> for [u8; 8] {
139    const LENGTH: usize = 8;
140    #[inline]
141    fn zero() -> Self {
142        [0; 8]
143    }
144}
145
146impl Unpacked<u16> for [u16; 16] {
147    const LENGTH: usize = 16;
148    #[inline]
149    fn zero() -> Self {
150        [0; 16]
151    }
152}
153
154impl Unpacked<u32> for [u32; 32] {
155    const LENGTH: usize = 32;
156    #[inline]
157    fn zero() -> Self {
158        [0; 32]
159    }
160}
161
162impl Unpacked<u64> for [u64; 64] {
163    const LENGTH: usize = 64;
164    #[inline]
165    fn zero() -> Self {
166        [0; 64]
167    }
168}
169
170/// A type representing a type that can be bitpacked and unpacked by this crate.
171pub trait Unpackable: Copy + Sized + Default {
172    type Packed: Packed;
173    type Unpacked: Unpacked<Self>;
174
175    fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked);
176    fn pack(unpacked: &Self::Unpacked, num_bits: usize, packed: &mut [u8]);
177}
178
179impl Unpackable for u16 {
180    type Packed = [u8; 16 * 2];
181    type Unpacked = [u16; 16];
182
183    #[inline]
184    fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {
185        unpack::unpack16(packed, unpacked, num_bits)
186    }
187
188    #[inline]
189    fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {
190        pack::pack16(packed, unpacked, num_bits)
191    }
192}
193
194impl Unpackable for u32 {
195    type Packed = [u8; 32 * 4];
196    type Unpacked = [u32; 32];
197
198    #[inline]
199    fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {
200        unpack::unpack32(packed, unpacked, num_bits)
201    }
202
203    #[inline]
204    fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {
205        pack::pack32(packed, unpacked, num_bits)
206    }
207}
208
209impl Unpackable for u64 {
210    type Packed = [u8; 64 * 8];
211    type Unpacked = [u64; 64];
212
213    #[inline]
214    fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {
215        unpack::unpack64(packed, unpacked, num_bits)
216    }
217
218    #[inline]
219    fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {
220        pack::pack64(packed, unpacked, num_bits)
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227
228    pub fn case1() -> (usize, Vec<u32>, Vec<u8>) {
229        let num_bits = 3;
230        let compressed = vec![
231            0b10001000u8,
232            0b11000110,
233            0b11111010,
234            0b10001000u8,
235            0b11000110,
236            0b11111010,
237            0b10001000u8,
238            0b11000110,
239            0b11111010,
240            0b10001000u8,
241            0b11000110,
242            0b11111010,
243            0b10001000u8,
244            0b11000110,
245            0b11111010,
246        ];
247        let decompressed = vec![
248            0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4,
249            5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
250        ];
251        (num_bits, decompressed, compressed)
252    }
253
254    #[test]
255    fn encode_large() {
256        let (num_bits, unpacked, expected) = case1();
257        let mut packed = vec![0u8; 4 * 32];
258
259        encode(&unpacked, num_bits, &mut packed);
260        assert_eq!(&packed[..15], expected);
261    }
262
263    #[test]
264    fn test_encode() {
265        let num_bits = 3;
266        let unpacked = vec![0, 1, 2, 3, 4, 5, 6, 7];
267
268        let mut packed = vec![0u8; 4 * 32];
269
270        encode::<u32>(&unpacked, num_bits, &mut packed);
271
272        let expected = vec![0b10001000u8, 0b11000110, 0b11111010];
273
274        assert_eq!(&packed[..3], expected);
275    }
276}