group_varint_encoding/
lib.rs

1mod decoder;
2use decoder::decode_block;
3mod util;
4use util::*;
5
6#[cfg(test)]
7mod tests {
8    use super::*;
9
10    #[test]
11    fn tst1() {
12        let data = compress([0u32, 0, 0, 0]);
13        assert_eq!(data, [0, 0, 0, 0, 0]);
14    }
15
16    #[test]
17    fn tst2() {
18        let data = compress([0xff, 0xff, 0xff, 0xff]);
19        assert_eq!(data, [0, 0xff, 0xff, 0xff, 0xff]);
20    }
21
22    #[test]
23    fn tst3() {
24        let data = compress([0xffff, 2, 3, 4]);
25        assert_eq!(data, [1, 0xff, 0xff, 2, 3, 4]);
26
27        let data = compress([0xff, 0xff01, 2, 3]);
28        assert_eq!(data, [1 << 2, 0xff, 1, 0xff, 2, 3]);
29
30        let data = compress([0xff, 0xff, 0xff01, 0]);
31        assert_eq!(data, [1 << 4, 0xff, 0xff, 1, 0xff, 0]);
32
33        let data = compress([0xff, 0xff, 0xff, 0xff03]);
34        assert_eq!(data, [1 << 6, 0xff, 0xff, 0xff, 3, 0xff]);
35    }
36
37    #[test]
38    fn tst4() {
39        let data = compress([1, 2, 3, 4, 5, 6, 7, 8]);
40        assert_eq!(data, [0, 1, 2, 3, 4, 0, 5, 6, 7, 8]);
41    }
42
43    #[test]
44    fn tst5() {
45        let data = compress([1, 2, 3, 4, 0xff05, 6, 7, 8]);
46        assert_eq!(data, [0, 1, 2, 3, 4, 1, 5, 0xff, 6, 7, 8]);
47    }
48
49    #[test]
50    fn tst6() {
51        let data = compress([1, 2, 3, 4, 0xaabbccdd, 6, 7, 8]);
52        assert_eq!(
53            data,
54            [
55                // block 1
56                0b00, 1, 2, 3, 4, // block 2
57                0b11, 0xdd, 0xcc, 0xbb, 0xaa, 6, 7, 8
58            ]
59        );
60    }
61
62    #[test]
63    fn tst7() {
64        let data = [3243, 12, 32432, 5435];
65
66        let compressed = compress(data.iter().cloned());
67
68        let dec = DataBlockIter { data: &compressed };
69
70        let newdata = dec.collect();
71
72        assert_eq!(newdata, data);
73    }
74
75    #[test]
76
77    fn tst8() {
78        let data = [732743432, 213213213, 32, 2314324];
79
80        let compressed = compress(data.iter().cloned());
81
82        let dec = DataBlockIter { data: &compressed };
83
84        let newdata = dec.collect();
85
86        assert_eq!(newdata, data);
87    }
88
89    #[test]
90
91    fn tst9() {
92        let data = [732743432, 213213213, 32, 2314324, 3243, 12, 32432, 5435];
93
94        let compressed = compress(data.iter().cloned());
95
96        let dec = DataBlockIter { data: &compressed };
97
98        let newdata = dec.collect();
99
100        assert_eq!(newdata, data);
101    }
102
103    #[test]
104    fn tst9_1() {
105        let data = [0, 213213213, 32, 2314324, 3243, 12, 32432, 5435];
106
107        let compressed = compress(data.iter().cloned());
108
109        let dec = DataBlockIter { data: &compressed };
110
111        let newdata = dec.collect();
112
113        assert_eq!(newdata, data);
114    }
115
116    #[test]
117    fn tst9_2() {
118        let data = [0, 0, 0, 0, 3243, 12, 32432, 5435];
119
120        let compressed = compress(data.iter().cloned());
121
122        let dec = DataBlockIter { data: &compressed };
123
124        let newdata = dec.collect();
125
126        assert_eq!(newdata, data);
127    }
128
129    #[test]
130    fn tst9_3_compression() {
131        let data = [0, 0, 0, 0, 1, 0, 0, 0];
132
133        let compressed = compress(data.iter().cloned());
134
135        assert_eq!(
136            compressed,
137            vec![
138                // block 1
139                0b00, 0, 0, 0, 0, // block 2
140                0b00, 1, 0, 0, 0,
141            ]
142        );
143        let dec = DataBlockIter { data: &compressed };
144
145        let newdata = dec.collect();
146
147        // a bug occured here, the 1 was at the 2nd index of the block
148        assert_eq!(newdata, data);
149    }
150
151    #[test]
152    fn tst9_4() {
153        let data = [0, 0, 0, 0, 0, 1, 0, 0];
154
155        let compressed = compress(data.iter().cloned());
156
157        let dec = DataBlockIter { data: &compressed };
158
159        let newdata = dec.collect();
160
161        assert_eq!(newdata, data);
162    }
163
164    #[test]
165    fn tst10() {
166        let data = [
167            327343432, 213213213, 32, 2314324, 3243, 12, 32432, 5435, 4356, 57, 657, 6546, 32, 4,
168            3245, 67, 65, 432, 465, 7, 643, 542, 5424, 2432, 4, 324, 324, 326, 765, 7534,
169            646546546, 45654, 6456, 546, 546, 546, 546, 546, 5462, 22222222, 5637426, 5356790,
170            98765432, 34567, 6544567, 6543245, 6543, 45678, 76543, 45678, 765, 3467890, 9876, 5432,
171            345, 0,
172        ];
173
174        let compressed = compress(data.iter().cloned());
175
176        let dec = DataBlockIter { data: &compressed };
177
178        let newdata = dec.collect();
179
180        assert_eq!(newdata, data);
181    }
182}
183
184pub struct DataBlockIter<'a> {
185    data: &'a [u8],
186}
187
188impl<'a> DataBlockIter<'a> {
189    pub fn collect(self) -> Vec<u32> {
190        let mut v = Vec::new();
191
192        for [a, b, c, d] in self {
193            v.push(a);
194            v.push(b);
195            v.push(c);
196            v.push(d);
197        }
198
199        v
200    }
201}
202
203impl<'a> Iterator for DataBlockIter<'a> {
204    type Item = [u32; 4];
205
206    fn next(&mut self) -> Option<Self::Item> {
207        if self.data.is_empty() {
208            return None;
209        }
210
211        let v = self.data[0];
212        let data = &self.data[1..];
213
214        let (a, b, c, d, offset) = decode_block(v, data);
215
216        self.data = &data[offset..];
217
218        Some([a, b, c, d])
219    }
220}
221
222pub fn decompress_4(data: &[u8]) -> (&[u8], [u32; 4]) {
223    let v = data[0];
224    let data = &data[1..];
225
226    let (a, b, c, d, offset) = decode_block(v, data);
227    (&data[offset..], [a, b, c, d])
228}
229
230pub fn decompress(data: &[u8]) -> DataBlockIter {
231    DataBlockIter { data }
232}
233
234pub fn compress(iter: impl IntoIterator<Item = u32>) -> Vec<u8> {
235    let mut buffer = Vec::new();
236    let iter = iter.into_iter();
237    for mut chunk in (Chunk { iter }) {
238        while chunk.len() < 4 {
239            chunk.push(0);
240        }
241
242        compress_block(&mut buffer, to_block(chunk));
243    }
244
245    buffer.shrink_to_fit();
246
247    buffer
248}
249
250fn to_block(v: Vec<u32>) -> [u32; 4] {
251    if v.len() != 4 {
252        unreachable!("length of vector must be 4");
253    }
254
255    [v[0], v[1], v[2], v[3]]
256}
257
258pub fn compress_block(buffer: &mut Vec<u8>, chunk: [u32; 4]) {
259    let mut mask = 0; //bits0 | bits1 << 2 | bits2 << 4 | bits3 << 6;
260    let maskidx = buffer.len();
261    buffer.push(0);
262
263    // loop over every integer in the chunk
264    for i in 0..4u8 {
265        let elem = chunk[i as usize];
266
267        let bits = var_bits(elem);
268        mask |= bits << (i << 1);
269
270        // the first byte uses less instructions to encode.
271        buffer.push((elem & 0xff) as u8);
272        for byte_index in 1..=bits {
273            let byte_index = byte_index * 8;
274            let byte = (elem >> byte_index) & 0xff;
275            buffer.push(byte as u8);
276        }
277    }
278
279    // apply mask
280    buffer[maskidx] = mask;
281}
282
283use smallvec::SmallVec;
284
285pub struct ListUInt32 {
286    data: Vec<u8>,
287    head: SmallVec<[u32; 3]>,
288}
289
290impl ListUInt32 {
291    pub fn new() -> Self {
292        ListUInt32 {
293            data: Vec::new(),
294            head: SmallVec::new(),
295        }
296    }
297
298    pub fn push(&mut self, value: u32) {
299        if self.head.len() == 3 {
300            let chunk = [self.head[0], self.head[1], self.head[2], value];
301            compress_block(&mut self.data, chunk);
302        } else {
303            self.head.push(value);
304        }
305    }
306
307    pub fn collect(&self) -> Vec<u32> {
308        let i = DataBlockIter { data: &self.data };
309        let mut v = i.collect();
310        for i in &self.head {
311            v.push(*i);
312        }
313        v
314    }
315}
316
317impl Default for ListUInt32 {
318    fn default() -> Self {
319        Self::new()
320    }
321}