group_varint_offset_encoding/
lib.rs1mod decoder;
2use decoder::decode_block;
3mod util;
4use util::*;
5
6#[cfg(test)]
7mod tests {
8 use super::*;
9
10 #[test]
11 fn tst1() {
12 let data = compress([0u32, 0, 0]);
13 assert_eq!(data, [0, 0, 0, 0]);
14 }
15
16 #[test]
17 fn tst2() {
18 let data = compress([128, 0, 0]);
19 assert_eq!(data, [0, 128, 0, 0]);
20 }
21
22 #[test]
23 fn tst3() {
24 let data = compress([255, 255, 255]);
25 assert_eq!(data, [1 << 6, 0, 0, 0]);
26 }
27
28 #[test]
29 fn tst4() {
30 let data = compress([255 * 2, 255, 255]);
31 assert_eq!(data, [1 << 6, 255, 0, 0]);
32 }
33
34 #[test]
35 fn test_lots_of_numbers() {
36 let mut data: Vec<u32> = (0..256).map(|i| i * i * i * i).collect();
38 data.extend(0..256);
39 data.extend((0..256).map(|i| i << 12));
40
41 let cmpr = compress(data.iter().cloned());
42 let result = decompress(&cmpr);
43
44 assert_eq!(
45 result, data,
46 "expect data to be the same before and after compression/decompression"
47 );
48 }
49}
50
51struct DataBlockIter<'a> {
52 data: &'a [u8],
53}
54
55impl<'a> DataBlockIter<'a> {
56 fn to_vec(self) -> Vec<u32> {
57 let mut v = Vec::new();
58
59 for [a, b, c] in self {
60 v.push(a);
61 v.push(b);
62 v.push(c);
63 }
64
65 v
66 }
67}
68
69fn get_offset(index: u8) -> u32 {
70 match index {
71 0b00 => 0,
72 0b01 => 0xff,
73 0b10 => 0xffff,
74 0b11 => 0xffffff,
75 _ => panic!("expect number ranging from 0..=3"),
76 }
77}
78
79impl<'a> Iterator for DataBlockIter<'a> {
80 type Item = [u32; 3];
81
82 fn next(&mut self) -> Option<Self::Item> {
83 if self.data.is_empty() {
84 return None;
85 }
86
87 let v = self.data[0];
88 let offset = get_offset(v >> 6);
89 let data = &self.data[1..];
90
91 let (mut a, mut b, mut c, bytes_consumed) = decode_block(v, data);
92
93 a += offset;
94 b += offset;
95 c += offset;
96
97 self.data = &data[bytes_consumed..];
98
99 Some([a, b, c])
100 }
101}
102
103pub fn decompress(data: &[u8]) -> Vec<u32> {
104 DataBlockIter { data }.to_vec()
105}
106
107pub fn compress(iter: impl IntoIterator<Item = u32>) -> Vec<u8> {
108 let mut buffer = Vec::new();
109 let iter = iter.into_iter();
110 for mut chunk in (Chunk { iter }) {
111 while chunk.len() < 3 {
113 chunk.push(0);
114 }
115
116 compress_block(&mut buffer, to_block(chunk));
117 }
118
119 buffer.shrink_to_fit();
120
121 buffer
122}
123
124fn to_block(v: Vec<u32>) -> [u32; 3] {
125 if v.len() != 3 {
126 unreachable!("length of vector must be 3");
127 }
128
129 [v[0], v[1], v[2]]
130}
131
132fn max_viable_offset(chunk: [u32; 3]) -> u8 {
135 for i in [3, 2, 1] {
136 let offset = get_offset(i);
137
138 let [a, b, c] = chunk.map(|elem| elem >= offset);
139
140 if a & b & c {
141 return i;
142 }
143 }
144
145 0
146}
147
148fn compress_block(buffer: &mut Vec<u8>, chunk: [u32; 3]) {
149 let offset_index = max_viable_offset(chunk);
151 let offset = get_offset(offset_index);
152
153 let chunk = chunk.map(|elem| elem - offset);
155
156 let mut mask = offset_index << 6; let maskidx = buffer.len();
158 buffer.push(0);
159
160 for i in 0..3u8 {
162 let elem = chunk[i as usize];
163
164 let bits = var_bits(elem);
165 mask |= bits << (i << 1);
166
167 buffer.push((elem & 0xff) as u8);
169 for byte_index in 1..=bits {
170 let byte_index = byte_index * 8;
171 let byte = (elem >> byte_index) & 0xff;
172 buffer.push(byte as u8);
173 }
174 }
175
176 buffer[maskidx] = mask;
178}
179
180use smallvec::SmallVec;
181
182pub struct ListUInt32 {
184 data: Vec<u8>,
185 head: SmallVec<[u32; 3]>,
186}
187
188impl ListUInt32 {
189 pub fn new() -> Self {
190 ListUInt32 {
191 data: Vec::new(),
192 head: SmallVec::new(),
193 }
194 }
195
196 pub fn push(&mut self, value: u32) {
197 if self.head.len() == 2 {
198 let chunk = [self.head[0], self.head[1], self.head[2]];
199 compress_block(&mut self.data, chunk);
200 } else {
201 self.head.push(value);
202 }
203 }
204
205 pub fn to_vec(&self) -> Vec<u32> {
206 let i = DataBlockIter { data: &self.data };
207 let mut v = i.to_vec();
208 for i in &self.head {
209 v.push(*i);
210 }
211 v
212 }
213}
214
215impl Default for ListUInt32 {
216 fn default() -> Self {
217 Self::new()
218 }
219}