int_compression_4_wise/
lib.rs

1mod decoder;
2use std::mem::size_of;
3
4use decoder::decode_block;
5mod util;
6use util::*;
7
8#[cfg(test)]
9mod tests {
10    use super::*;
11
12    #[test]
13    fn tst1() {
14        let data = compress([0u32, 0, 0, 0]);
15        assert_eq!(data, [0, 0, 0, 0, 0]);
16    }
17
18    #[test]
19    fn tst2() {
20        let data = compress([0xff, 0xff, 0xff, 0xff]);
21        assert_eq!(data, [0, 0xff, 0xff, 0xff, 0xff]);
22    }
23
24    #[test]
25    fn tst3() {
26        let data = compress([0xffff, 2, 3, 4]);
27        assert_eq!(data, [1, 0xff, 0xff, 2, 3, 4]);
28
29        let data = compress([0xff, 0xff01, 2, 3]);
30        assert_eq!(data, [1 << 2, 0xff, 1, 0xff, 2, 3]);
31
32        let data = compress([0xff, 0xff, 0xff01, 0]);
33        assert_eq!(data, [1 << 4, 0xff, 0xff, 1, 0xff, 0]);
34
35        let data = compress([0xff, 0xff, 0xff, 0xff03]);
36        assert_eq!(data, [1 << 6, 0xff, 0xff, 0xff, 3, 0xff]);
37    }
38
39    #[test]
40    fn tst4() {
41        let data = compress([1, 2, 3, 4, 5, 6, 7, 8]);
42        assert_eq!(data, [0, 1, 2, 3, 4, 0, 5, 6, 7, 8]);
43    }
44
45    #[test]
46    fn tst5() {
47        let data = compress([1, 2, 3, 4, 0xff05, 6, 7, 8]);
48        assert_eq!(data, [0, 1, 2, 3, 4, 1, 5, 0xff, 6, 7, 8]);
49    }
50
51    #[test]
52    fn tst6() {
53        let data = compress([1, 2, 3, 4, 0xaabbccdd, 6, 7, 8]);
54        assert_eq!(
55            data,
56            [
57                // block 1
58                0b00, 1, 2, 3, 4, // block 2
59                0b11, 0xdd, 0xcc, 0xbb, 0xaa, 6, 7, 8
60            ]
61        );
62    }
63
64    #[test]
65    fn tst7() {
66        let data = [3243, 12, 32432, 5435];
67
68        let compressed = compress(data.iter().cloned());
69
70        let dec = DataBlockIter { data: &compressed };
71
72        let newdata = dec.collect();
73
74        assert_eq!(newdata, data);
75    }
76
77    #[test]
78
79    fn tst8() {
80        let data = [732743432, 213213213, 32, 2314324];
81
82        let compressed = compress(data.iter().cloned());
83
84        let dec = DataBlockIter { data: &compressed };
85
86        let newdata = dec.collect();
87
88        assert_eq!(newdata, data);
89    }
90
91    #[test]
92
93    fn tst9() {
94        let data = [732743432, 213213213, 32, 2314324, 3243, 12, 32432, 5435];
95
96        let compressed = compress(data.iter().cloned());
97
98        let dec = DataBlockIter { data: &compressed };
99
100        let newdata = dec.collect();
101
102        assert_eq!(newdata, data);
103    }
104
105    #[test]
106    fn tst9_1() {
107        let data = [0, 213213213, 32, 2314324, 3243, 12, 32432, 5435];
108
109        let compressed = compress(data.iter().cloned());
110
111        let dec = DataBlockIter { data: &compressed };
112
113        let newdata = dec.collect();
114
115        assert_eq!(newdata, data);
116    }
117
118    #[test]
119    fn tst9_2() {
120        let data = [0, 0, 0, 0, 3243, 12, 32432, 5435];
121
122        let compressed = compress(data.iter().cloned());
123
124        let dec = DataBlockIter { data: &compressed };
125
126        let newdata = dec.collect();
127
128        assert_eq!(newdata, data);
129    }
130
131    #[test]
132    fn tst9_3_compression() {
133        let data = [0, 0, 0, 0, 1, 0, 0, 0];
134
135        let compressed = compress(data.iter().cloned());
136
137        assert_eq!(
138            compressed,
139            vec![
140                // block 1
141                0b00, 0, 0, 0, 0, // block 2
142                0b00, 1, 0, 0, 0,
143            ]
144        );
145        let dec = DataBlockIter { data: &compressed };
146
147        let newdata = dec.collect();
148
149        // a bug occured here, the 1 was at the 2nd index of the block
150        assert_eq!(newdata, data);
151    }
152
153    #[test]
154    fn tst9_4() {
155        let data = [0, 0, 0, 0, 0, 1, 0, 0];
156
157        let compressed = compress(data.iter().cloned());
158
159        let dec = DataBlockIter { data: &compressed };
160
161        let newdata = dec.collect();
162
163        assert_eq!(newdata, data);
164    }
165
166    #[test]
167    fn tst10() {
168        let data = [
169            327343432, 213213213, 32, 2314324, 3243, 12, 32432, 5435, 4356, 57, 657, 6546, 32, 4,
170            3245, 67, 65, 432, 465, 7, 643, 542, 5424, 2432, 4, 324, 324, 326, 765, 7534,
171            646546546, 45654, 6456, 546, 546, 546, 546, 546, 5462, 22222222, 5637426, 5356790,
172            98765432, 34567, 6544567, 6543245, 6543, 45678, 76543, 45678, 765, 3467890, 9876, 5432,
173            345, 0,
174        ];
175
176        let compressed = compress(data.iter().cloned());
177
178        let dec = DataBlockIter { data: &compressed };
179
180        let newdata = dec.collect();
181
182        assert_eq!(newdata, data);
183    }
184}
185
186pub struct DataBlockIter<'a> {
187    data: &'a [u8],
188}
189
190impl<'a> DataBlockIter<'a> {
191    pub fn collect(self) -> Vec<u32> {
192        let mut v = Vec::new();
193
194        for [a, b, c, d] in self {
195            v.push(a);
196            v.push(b);
197            v.push(c);
198            v.push(d);
199        }
200
201        v
202    }
203}
204
205impl<'a> Iterator for DataBlockIter<'a> {
206    type Item = [u32; 4];
207
208    fn next(&mut self) -> Option<Self::Item> {
209        if self.data.is_empty() {
210            return None;
211        }
212
213        let v = self.data[0];
214        let data = &self.data[1..];
215
216        let (a, b, c, d, offset) = decode_block(v, data);
217
218        self.data = &data[offset..];
219
220        Some([a, b, c, d])
221    }
222}
223
224pub fn decompress(data: &[u8]) -> DataBlockIter {
225    DataBlockIter { data }
226}
227
228pub fn compress(iter: impl IntoIterator<Item = u32>) -> Vec<u8> {
229    let mut buffer = Vec::new();
230    let iter = iter.into_iter();
231    for mut chunk in (Chunk { iter }) {
232        while chunk.len() < 4 {
233            chunk.push(0);
234        }
235
236        compress_block(&mut buffer, to_block(chunk));
237    }
238
239    buffer.shrink_to_fit();
240
241    buffer
242}
243
244fn to_block(v: Vec<u32>) -> [u32; 4] {
245    if v.len() != 4 {
246        unreachable!("length of vector must be 4");
247    }
248
249    [v[0], v[1], v[2], v[3]]
250}
251
252fn compress_block(buffer: &mut Vec<u8>, chunk: [u32; 4]) {
253    let mut mask = 0; //bits0 | bits1 << 2 | bits2 << 4 | bits3 << 6;
254    let maskidx = buffer.len();
255    buffer.push(0);
256
257    // loop over every integer in the chunk
258    for i in 0..4u8 {
259        let elem = chunk[i as usize];
260
261        let bits = var_bits(elem);
262        mask |= bits << (i << 1);
263
264        // the first byte uses less instructions to encode.
265        buffer.push((elem & 0xff) as u8);
266        for byte_index in 1..=bits {
267            let byte_index = byte_index * 8;
268            let byte = (elem >> byte_index) & 0xff;
269            buffer.push(byte as u8);
270        }
271    }
272
273    // apply mask
274    buffer[maskidx] = mask;
275}
276
277use smallvec::SmallVec;
278
279pub struct ListUInt32 {
280    data: Vec<u8>,
281    head: SmallVec<[u32; 3]>,
282}
283
284impl get_size::GetSize for ListUInt32 {
285    fn get_heap_size(&self) -> usize {
286        let smallvecsize = self.head.len() * size_of::<u32>();
287        self.data.len() + smallvecsize
288    }
289}
290
291impl ListUInt32 {
292    pub fn new() -> Self {
293        ListUInt32 {
294            data: Vec::new(),
295            head: SmallVec::new(),
296        }
297    }
298
299    pub fn push(&mut self, value: u32) {
300        if self.head.len() == 3 {
301            let chunk = [self.head[0], self.head[1], self.head[2], value];
302            compress_block(&mut self.data, chunk);
303        } else {
304            self.head.push(value);
305        }
306    }
307
308    pub fn collect(&self) -> Vec<u32> {
309        let i = DataBlockIter { data: &self.data };
310        let mut v = i.collect();
311        for i in &self.head {
312            v.push(*i);
313        }
314        v
315    }
316}
317
318impl Default for ListUInt32 {
319    fn default() -> Self {
320        Self::new()
321    }
322}