packed_uints/
packed_uints.rs

1const U4_IN_U8: usize = 8 / 4;
2const PARITY_MASK: usize = U4_IN_U8 - 1;
3
4#[derive(Debug, Clone)]
5pub enum PackedEnum {
6    U4(Vec<u8>),
7    U8(Vec<u8>),
8    U16(Vec<u16>),
9    U32(Vec<u32>),
10}
11
12impl PackedEnum {
13    pub fn mask(&self) -> usize {
14        match self {
15            Self::U4(_) => 0b1111 as usize,
16            Self::U8(_) => u8::MAX as usize,
17            Self::U16(_) => u16::MAX as usize,
18            Self::U32(_) => u32::MAX as usize,
19        }
20    }
21
22    #[inline(always)]
23    fn get(&self, i: usize) -> usize {
24        match self {
25            Self::U4(data) => {
26                let shift = 4 * (i & PARITY_MASK);
27                ((data[i/U4_IN_U8] >> shift ) & 0b1111) as usize
28            }
29            Self::U8(data) => data[i] as usize,
30            Self::U16(data) => data[i] as usize,
31            Self::U32(data) => data[i] as usize,
32        }
33    }
34
35    #[inline(always)]
36    fn set(&mut self, i: usize, value: usize) {
37        match self {
38            Self::U4(data) => {
39                let shift: usize = 4 * (i & PARITY_MASK);
40                let mask = 0b1111 << shift;
41                let i = i / U4_IN_U8;
42                data[i] &= !mask;
43                data[i] |= (value as u8) << shift;
44            }
45            Self::U8(data) => {
46                data[i] = value as u8;
47            }
48            Self::U16(data) => {
49                data[i] = value as u16;
50            }
51            Self::U32(data) => {
52                data[i] = value as u32;
53            }
54        }
55    }
56
57    fn set_range(&mut self, start: usize, end: usize, value: usize) {
58        match self {
59            Self::U4(data) => {
60                // NOTE: this part assumes we're storing u4 in u8 (unlike the rest of the code)
61                for i in start..end {
62                    let shift: usize = 4 * (i & PARITY_MASK);
63                    let mask = 0b1111 << shift;
64                    let i = i / U4_IN_U8;
65                    data[i] &= !mask;
66                    data[i] |= (value as u8) << shift;
67                }
68            }
69            Self::U8(data) => {
70                for i in start..end {
71                    data[i] = value as u8;
72                }
73            }
74            Self::U16(data) => {
75                for i in start..end {
76                    data[i] = value as u16;
77                }
78            }
79            Self::U32(data) => {
80                for i in start..end {
81                    data[i] = value as u32;
82                }
83            }
84        }
85    }
86
87    fn set_range_step(&mut self, start: usize, end: usize, step: usize, value: usize) {
88        match self {
89            Self::U4(data) => {
90                // NOTE: this part assumes we're storing u4 in u8 (unlike the rest of the code)
91                for i in (start..end).step_by(step) {
92                    let shift: usize = 4 * (i & PARITY_MASK);
93                    let mask = 0b1111 << shift;
94                    let i = i / U4_IN_U8;
95                    data[i] &= !mask;
96                    data[i] |= (value as u8) << shift;
97                }
98            }
99            Self::U8(data) => {
100                for i in (start..end).step_by(step) {
101                    data[i] = value as u8;
102                }
103            }
104            Self::U16(data) => {
105                for i in (start..end).step_by(step) {
106                    data[i] = value as u16;
107                }
108            }
109            Self::U32(data) => {
110                for i in (start..end).step_by(step) {
111                    data[i] = value as u32;
112                }
113            }
114        }
115    }
116
117    fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
118        match self {
119            Self::U4(data) => Box::new(data.iter().flat_map(|a| {
120                [(a & 0b1111) as usize, (a >> 4) as usize]
121            })),
122            Self::U8(data) => Box::new(data.iter().map(|a| *a as usize)),
123            Self::U16(data) => Box::new(data.iter().map(|a| *a as usize)),
124            Self::U32(data) => Box::new(data.iter().map(|a| *a as usize)),
125        }
126    }
127    
128    pub fn unpack_u8(&self) -> Vec<u8> {
129        match self {
130            Self::U4(data) => {
131                data.iter().flat_map(|a| {
132                    [(a & 0b1111), (a >> 4)]
133                }).collect()
134            },
135            Self::U8(data) => data.iter().map(|a| *a as u8).collect(),
136            Self::U16(data) => data.iter().map(|a| *a as u8).collect(),
137            Self::U32(data) => data.iter().map(|a| *a as u8).collect(),
138        }
139    }
140
141    pub fn unpack_u16(&self) -> Vec<u16> {
142        match self {
143            Self::U4(data) => {
144                data.iter().flat_map(|a| {
145                    [(a & 0b1111) as u16, (a >> 4) as u16]
146                }).collect()
147            },
148            Self::U8(data) => data.iter().map(|a| *a as u16).collect(),
149            Self::U16(data) => data.clone(),
150            Self::U32(data) => data.iter().map(|a| *a as u16).collect(),
151        }
152    }
153
154    pub fn unpack_u32(&self) -> Vec<u32> {
155        match self {
156            Self::U4(data) => {
157                data.iter().flat_map(|a| {
158                    [(a & 0b1111) as u32, (a >> 4) as u32]
159                }).collect()
160            },
161            Self::U8(data) => data.iter().map(|a| *a as u32).collect(),
162            Self::U16(data) => data.iter().map(|a| *a as u32).collect(),
163            Self::U32(data) => data.clone(),
164        }
165    }
166}
167
168#[derive(Debug, Clone)]
169pub struct PackedUints {
170    pub data: PackedEnum,
171    pub mask: usize,
172    pub length: usize,
173}
174
175impl PackedUints {
176    pub fn new(length: usize) -> Self {
177        PackedUints::filled(length, 0)
178    }
179
180    pub fn filled(length: usize, value: usize) -> Self {
181        let bits = value.max(2).ilog2();
182        let data = if bits < 4 {
183            let value = value | (value << 4);
184            PackedEnum::U4(vec![value as u8; (length+U4_IN_U8-1) / U4_IN_U8])
185        } else if bits < 8 {
186            PackedEnum::U8(vec![value as u8; length])
187        } else if bits < 16 {
188            PackedEnum::U16(vec![value as u16; length])
189        } else {
190            PackedEnum::U32(vec![value as u32; length])
191        };
192        PackedUints { 
193            data: data, 
194            mask: 0b1111, 
195            length: length 
196        }
197    }
198
199    pub fn from(values: &[usize]) -> Self {
200        let bits = values.iter().max().unwrap_or(&2).ilog2();
201        let data = if bits < 4 {
202            let mut res = vec![0; (values.len()+U4_IN_U8-1) / U4_IN_U8];
203            for i in (0..values.len()).step_by(2) {
204                res[i/2] = (values[i+1] << 4 | values[i]) as u8;
205            }
206            PackedEnum::U4(res)
207        } else if bits < 8 {
208            PackedEnum::U8(values.iter().map(|a| *a as u8).collect())
209        } else if bits < 16 {
210            PackedEnum::U16(values.iter().map(|a| *a as u16).collect())
211        } else {
212            PackedEnum::U32(values.iter().map(|a| *a as u32).collect())
213        };
214        PackedUints {
215            mask: data.mask(),
216            data,
217            length: values.len() 
218        }
219    }
220
221    pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
222        self.data.iter()
223    }
224
225    #[inline]
226    pub fn get(&self, i: usize) -> usize {
227        self.data.get(i)
228    }
229
230    #[inline]
231    fn upscale_if_needed(&mut self, value: usize) {
232        if (value & self.mask) != value {
233            let bits = value.ilog2();
234            self.data = if bits < 8 {
235                PackedEnum::U8(self.data.iter().take(self.length).map(|a| a as u8).collect())
236            } else if bits < 16 {
237                PackedEnum::U16(self.data.iter().take(self.length).map(|a| a as u16).collect())
238            } else {
239                PackedEnum::U32(self.data.iter().take(self.length).map(|a| a as u32).collect())
240            };
241            self.mask = self.data.mask();
242        }
243    }
244
245    #[inline]
246    pub fn set(&mut self, i: usize, value: usize) {
247        self.upscale_if_needed(value);
248        self.data.set(i, value)
249    }
250
251    #[inline]
252    pub fn set_range(&mut self, start: usize, end: usize, value: usize) {
253        // check that both start and length are even
254        self.upscale_if_needed(value);
255        self.data.set_range(start, end, value);
256    }
257
258    #[inline]
259    pub fn set_range_step(&mut self, start: usize, end: usize, step: usize, value: usize) {
260        self.upscale_if_needed(value);
261        self.data.set_range_step(start, end, step, value);
262    }
263
264    /// Same thing as iter().map(|value| value as u8).collect() but 5x faster
265    #[inline]
266    pub fn unpack_u8(&self) -> Vec<u8> {
267        self.data.unpack_u8()
268    }
269
270    /// Same thing as iter().map(|value| value as u16).collect() but 5x faster
271    #[inline]
272    pub fn unpack_u16(&self) -> Vec<u16> {
273        self.data.unpack_u16()
274    }
275
276    /// Same thing as iter().map(|value| value as u32).collect() but 5x faster
277    #[inline]
278    pub fn unpack_u32(&self) -> Vec<u32> {
279        self.data.unpack_u32()
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use std::iter::zip;
286    use rand::Rng;
287    use super::PackedUints;
288
289    fn test_equal(usizes: &PackedUints, values: &[usize]) {
290        for (i, value) in values.iter().enumerate() {
291            assert_eq!(*value, usizes.get(i));
292        }
293    }
294
295    fn roundtrip(usizes: &mut PackedUints, values: &[usize]) {
296        for (i, value) in values.iter().enumerate() {
297            usizes.set(i, *value);
298        }
299        test_equal(usizes, values);
300    }
301
302    #[test]
303    pub fn test_from_iter() {
304        let mut rng = rand::thread_rng();
305        let values: [usize; 100] = [(); 100].map(|_| rng.gen_range(0..16));
306        let usizes = PackedUints::from(&values);
307        // retrieve them and test for equality
308        for (a, b) in zip(values, usizes.iter()) {
309            assert_eq!(a, b);
310        }
311    }
312
313    #[test]
314    pub fn test_u4() {
315        // EASY: Every values are in range
316        // holds integers of 4 bits (max = 2^4-1 = 15)
317        let mut rng = rand::thread_rng();
318        let mut usizes = PackedUints::new(100);
319        let values: [usize; 100] = [(); 100].map(|_| rng.gen_range(0..16));
320        roundtrip(&mut usizes, &values);
321    }
322
323    fn test_set_range(data_len: usize, start: usize, end: usize, value: usize) {
324        let mut usizes = PackedUints::new(data_len);
325        let mut values = vec![0; data_len];
326        for i in start..end {
327            values[i] = value;
328        }
329        usizes.set_range(start, end, value);
330        test_equal(&usizes, &values);
331    }
332
333    #[test]
334    pub fn test_set_range_1() {
335        test_set_range(100, 0, 32, 7);
336    }
337
338    #[test]
339    pub fn test_set_range_2() {
340        test_set_range(100, 1, 32, 7);
341    }
342
343    #[test]
344    pub fn test_set_range_3() {
345        test_set_range(100, 1, 31, 7);
346    }
347
348    #[test]
349    pub fn test_set_range_4() {
350        test_set_range(100, 0, 31, 7);
351    }
352
353    #[test]
354    pub fn test_reallocation() {
355        // HARD: some values exceed the capacity of 2^bitsize-1, need to reallocate
356        let mut rng = rand::thread_rng();
357        let mut usizes = PackedUints::new(100);
358        let values: [usize; 100] = [(); 100].map(|_| rng.gen_range(0..u32::MAX) as usize);
359        roundtrip(&mut usizes, &values);
360    }
361}