hayro_syntax/
bit_reader.rs

1//! A bit reader that supports reading numbers from a bit stream, with a number of bits
2//! up to 32.
3
4use log::warn;
5use smallvec::{SmallVec, smallvec};
6use std::fmt::Debug;
7
8/// A bit size.
9#[derive(PartialEq, Eq, Debug, Clone, Copy)]
10pub struct BitSize(u8);
11
12impl BitSize {
13    /// Create a new `BitSize`. Returns `None` if the number is bigger than 32.
14    pub fn from_u8(value: u8) -> Option<Self> {
15        if value > 32 { None } else { Some(Self(value)) }
16    }
17
18    /// Return the number of bits of the bit size.
19    pub fn bits(&self) -> usize {
20        self.0 as usize
21    }
22
23    /// Return the bit mask of the bit size.
24    pub fn mask(&self) -> u32 {
25        ((1u64 << self.0 as u64) - 1) as u32
26    }
27}
28
29/// A bit reader.
30pub struct BitReader<'a> {
31    data: &'a [u8],
32    cur_pos: usize,
33}
34
35impl<'a> BitReader<'a> {
36    /// Create a new bit reader.
37    pub fn new(data: &'a [u8]) -> Self {
38        Self::new_with(data, 0)
39    }
40
41    /// Create a new bit reader, and start at a specific bit offset.
42    pub fn new_with(data: &'a [u8], cur_pos: usize) -> Self {
43        Self { data, cur_pos }
44    }
45
46    /// Align the reader to the next byte boundary.
47    pub fn align(&mut self) {
48        let bit_pos = self.bit_pos();
49
50        if bit_pos % 8 != 0 {
51            self.cur_pos += 8 - bit_pos;
52        }
53    }
54
55    /// Read the given number of bits from the byte stream.
56    pub fn read(&mut self, bit_size: BitSize) -> Option<u32> {
57        let byte_pos = self.byte_pos();
58
59        if bit_size.0 > 32 || byte_pos >= self.data.len() {
60            return None;
61        }
62
63        let item = match bit_size.0 {
64            8 => {
65                let item = self.data[byte_pos] as u32;
66                self.cur_pos += 8;
67
68                Some(item)
69            }
70            0..=32 => {
71                let bit_pos = self.bit_pos();
72                let end_byte_pos = (bit_pos + bit_size.0 as usize - 1) / 8;
73                let mut read = [0u8; 8];
74
75                for i in 0..=end_byte_pos {
76                    read[i] = *self.data.get(byte_pos + i)?;
77                }
78
79                let item = (u64::from_be_bytes(read) >> (64 - bit_pos - bit_size.0 as usize))
80                    as u32
81                    & bit_size.mask();
82                self.cur_pos += bit_size.0 as usize;
83
84                Some(item)
85            }
86            _ => unreachable!(),
87        }?;
88
89        Some(item)
90    }
91
92    fn byte_pos(&self) -> usize {
93        self.cur_pos / 8
94    }
95
96    fn bit_pos(&self) -> usize {
97        self.cur_pos % 8
98    }
99}
100
101#[derive(Debug)]
102pub(crate) struct BitWriter<'a> {
103    data: &'a mut [u8],
104    cur_pos: usize,
105    bit_size: BitSize,
106}
107
108impl<'a> BitWriter<'a> {
109    pub(crate) fn new(data: &'a mut [u8], bit_size: BitSize) -> Option<Self> {
110        if !matches!(bit_size.0, 1 | 2 | 4 | 8 | 16) {
111            return None;
112        }
113
114        Some(Self {
115            data,
116            bit_size,
117            cur_pos: 0,
118        })
119    }
120
121    pub(crate) fn split_off(self) -> (&'a [u8], BitWriter<'a>) {
122        // Assumes that we are currently aligned to a byte boundary!
123        let (left, right) = self.data.split_at_mut(self.cur_pos / 8);
124        (
125            left,
126            BitWriter {
127                data: right,
128                cur_pos: 0,
129                bit_size: self.bit_size,
130            },
131        )
132    }
133
134    pub(crate) fn cur_pos(&self) -> usize {
135        self.cur_pos
136    }
137
138    pub(crate) fn get_data(&self) -> &[u8] {
139        self.data
140    }
141
142    fn byte_pos(&self) -> usize {
143        self.cur_pos / 8
144    }
145
146    fn bit_pos(&self) -> usize {
147        self.cur_pos % 8
148    }
149
150    pub(crate) fn write(&mut self, val: u16) -> Option<()> {
151        let byte_pos = self.byte_pos();
152        let bit_size = self.bit_size;
153
154        match bit_size.0 {
155            1 | 2 | 4 => {
156                let bit_pos = self.bit_pos();
157
158                let base = self.data.get(byte_pos)?;
159                let shift = 8 - self.bit_size.bits() - bit_pos;
160                let item = ((val & self.bit_size.mask() as u16) as u8) << shift;
161
162                *(self.data.get_mut(byte_pos)?) = *base | item;
163                self.cur_pos += bit_size.bits();
164            }
165            8 => {
166                *(self.data.get_mut(byte_pos)?) = val as u8;
167                self.cur_pos += 8;
168            }
169            16 => {
170                self.data
171                    .get_mut(byte_pos..(byte_pos + 2))?
172                    .copy_from_slice(&val.to_be_bytes());
173                self.cur_pos += 16;
174            }
175            _ => unreachable!(),
176        }
177
178        Some(())
179    }
180}
181
182pub(crate) struct BitChunks<'a> {
183    reader: BitReader<'a>,
184    bit_size: BitSize,
185    chunk_len: usize,
186}
187
188impl<'a> BitChunks<'a> {
189    pub(crate) fn new(data: &'a [u8], bit_size: BitSize, chunk_len: usize) -> Option<Self> {
190        if bit_size.0 > 16 {
191            warn!("BitChunks doesn't support working with bit sizes > 16.");
192
193            return None;
194        }
195
196        let reader = BitReader::new(data);
197
198        Some(Self {
199            reader,
200            bit_size,
201            chunk_len,
202        })
203    }
204}
205
206impl<'a> Iterator for BitChunks<'_> {
207    type Item = BitChunk;
208
209    fn next(&mut self) -> Option<Self::Item> {
210        let mut bits = SmallVec::new();
211
212        for _ in 0..self.chunk_len {
213            bits.push(self.reader.read(self.bit_size)? as u16);
214        }
215
216        Some(BitChunk { bits })
217    }
218}
219
220#[derive(Debug, Clone)]
221pub(crate) struct BitChunk {
222    bits: SmallVec<[u16; 4]>,
223}
224
225impl BitChunk {
226    pub fn iter(&self) -> impl Iterator<Item = u16> + '_ {
227        self.bits.iter().copied()
228    }
229
230    pub(crate) fn new(val: u8, count: usize) -> Self {
231        Self {
232            bits: smallvec![val as u16; count],
233        }
234    }
235
236    pub(crate) fn from_reader(
237        bit_reader: &mut BitReader,
238        bit_size: BitSize,
239        chunk_len: usize,
240    ) -> Option<Self> {
241        if bit_size.0 > 16 {
242            warn!("BitChunk doesn't support working with bit sizes > 16.");
243
244            return None;
245        }
246
247        let mut bits = SmallVec::new();
248
249        for _ in 0..chunk_len {
250            bits.push(bit_reader.read(bit_size)? as u16);
251        }
252
253        Some(BitChunk { bits })
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    const BS1: BitSize = BitSize(1);
262    const BS2: BitSize = BitSize(2);
263    const BS4: BitSize = BitSize(4);
264    const BS8: BitSize = BitSize(8);
265    const BS16: BitSize = BitSize(16);
266
267    #[test]
268    fn bit_reader_16() {
269        let data = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06];
270        let mut reader = BitReader::new(&data);
271        assert_eq!(
272            reader.read(BS16).unwrap() as u16,
273            u16::from_be_bytes([0x01, 0x02])
274        );
275        assert_eq!(
276            reader.read(BS16).unwrap() as u16,
277            u16::from_be_bytes([0x03, 0x04])
278        );
279        assert_eq!(
280            reader.read(BS16).unwrap() as u16,
281            u16::from_be_bytes([0x05, 0x06])
282        );
283    }
284
285    #[test]
286    fn bit_writer_16() {
287        let mut buf = vec![0u8; 6];
288        let mut writer = BitWriter::new(&mut buf, BitSize::from_u8(16).unwrap()).unwrap();
289        writer.write(u16::from_be_bytes([0x01, 0x02])).unwrap();
290        writer.write(u16::from_be_bytes([0x03, 0x04])).unwrap();
291        writer.write(u16::from_be_bytes([0x05, 0x06])).unwrap();
292
293        assert_eq!(buf, [0x01, 0x02, 0x03, 0x04, 0x05, 0x06]);
294    }
295
296    #[test]
297    fn bit_reader_12() {
298        let data = [0b10011000, 0b00011111, 0b10101001, 0b11101001, 0b00011010];
299        let mut reader = BitReader::new(&data);
300        assert_eq!(
301            reader.read(BitSize::from_u8(12).unwrap()).unwrap(),
302            0b100110000001
303        );
304        assert_eq!(
305            reader.read(BitSize::from_u8(12).unwrap()).unwrap(),
306            0b111110101001
307        );
308        assert_eq!(
309            reader.read(BitSize::from_u8(12).unwrap()).unwrap(),
310            0b111010010001
311        );
312    }
313
314    #[test]
315    fn bit_reader_9() {
316        let data = [0b10011000, 0b00011111, 0b10101001, 0b11101001, 0b00011010];
317        let mut reader = BitReader::new(&data);
318        assert_eq!(
319            reader.read(BitSize::from_u8(9).unwrap()).unwrap(),
320            0b100110000
321        );
322        assert_eq!(
323            reader.read(BitSize::from_u8(9).unwrap()).unwrap(),
324            0b001111110
325        );
326        assert_eq!(
327            reader.read(BitSize::from_u8(9).unwrap()).unwrap(),
328            0b101001111
329        );
330        assert_eq!(
331            reader.read(BitSize::from_u8(9).unwrap()).unwrap(),
332            0b010010001
333        );
334    }
335
336    #[test]
337    fn bit_writer_8() {
338        let mut buf = vec![0u8; 3];
339        let mut writer = BitWriter::new(&mut buf, BitSize::from_u8(8).unwrap()).unwrap();
340        writer.write(0x01).unwrap();
341        writer.write(0x02).unwrap();
342        writer.write(0x03).unwrap();
343
344        assert_eq!(buf, [0x01, 0x02, 0x03]);
345    }
346
347    #[test]
348    fn bit_reader_8() {
349        let data = [0x01, 0x02, 0x03];
350        let mut reader = BitReader::new(&data);
351        assert_eq!(reader.read(BS8).unwrap(), 0x01);
352        assert_eq!(reader.read(BS8).unwrap(), 0x02);
353        assert_eq!(reader.read(BS8).unwrap(), 0x03);
354    }
355
356    #[test]
357    fn bit_writer_4() {
358        let mut buf = vec![0u8; 3];
359        let mut writer = BitWriter::new(&mut buf, BitSize::from_u8(4).unwrap()).unwrap();
360        writer.write(0b1001).unwrap();
361        writer.write(0b1000).unwrap();
362        writer.write(0b0001).unwrap();
363        writer.write(0b1111).unwrap();
364        writer.write(0b1010).unwrap();
365        writer.write(0b1001).unwrap();
366
367        assert_eq!(buf, [0b10011000, 0b00011111, 0b10101001]);
368    }
369
370    #[test]
371    fn bit_reader_4() {
372        let data = [0b10011000, 0b00011111, 0b10101001];
373        let mut reader = BitReader::new(&data);
374        assert_eq!(reader.read(BS4).unwrap(), 0b1001);
375        assert_eq!(reader.read(BS4).unwrap(), 0b1000);
376        assert_eq!(reader.read(BS4).unwrap(), 0b0001);
377        assert_eq!(reader.read(BS4).unwrap(), 0b1111);
378        assert_eq!(reader.read(BS4).unwrap(), 0b1010);
379        assert_eq!(reader.read(BS4).unwrap(), 0b1001);
380    }
381
382    #[test]
383    fn bit_writer_2() {
384        let mut buf = vec![0u8; 2];
385        let mut writer = BitWriter::new(&mut buf, BitSize::from_u8(2).unwrap()).unwrap();
386        writer.write(0b10).unwrap();
387        writer.write(0b01).unwrap();
388        writer.write(0b10).unwrap();
389        writer.write(0b00).unwrap();
390        writer.write(0b00).unwrap();
391        writer.write(0b01).unwrap();
392        writer.write(0b00).unwrap();
393        writer.write(0b00).unwrap();
394
395        assert_eq!(buf, [0b10011000, 0b00010000]);
396    }
397
398    #[test]
399    fn bit_reader_2() {
400        let data = [0b10011000, 0b00010000];
401        let mut reader = BitReader::new(&data);
402        assert_eq!(reader.read(BS2).unwrap(), 0b10);
403        assert_eq!(reader.read(BS2).unwrap(), 0b01);
404        assert_eq!(reader.read(BS2).unwrap(), 0b10);
405        assert_eq!(reader.read(BS2).unwrap(), 0b00);
406        assert_eq!(reader.read(BS2).unwrap(), 0b00);
407        assert_eq!(reader.read(BS2).unwrap(), 0b01);
408        assert_eq!(reader.read(BS2).unwrap(), 0b00);
409        assert_eq!(reader.read(BS2).unwrap(), 0b00);
410    }
411
412    #[test]
413    fn bit_writer_1() {
414        let mut buf = vec![0u8; 2];
415        let mut writer = BitWriter::new(&mut buf, BitSize::from_u8(1).unwrap()).unwrap();
416        writer.write(0b1).unwrap();
417        writer.write(0b0).unwrap();
418        writer.write(0b0).unwrap();
419        writer.write(0b1).unwrap();
420        writer.write(0b1).unwrap();
421        writer.write(0b0).unwrap();
422        writer.write(0b0).unwrap();
423        writer.write(0b0).unwrap();
424
425        writer.write(0b0).unwrap();
426        writer.write(0b0).unwrap();
427        writer.write(0b0).unwrap();
428        writer.write(0b1).unwrap();
429        writer.write(0b0).unwrap();
430        writer.write(0b0).unwrap();
431        writer.write(0b0).unwrap();
432        writer.write(0b0).unwrap();
433
434        assert_eq!(buf, [0b10011000, 0b00010000]);
435    }
436
437    #[test]
438    fn bit_reader_1() {
439        let data = [0b10011000, 0b00010000];
440        let mut reader = BitReader::new(&data);
441        assert_eq!(reader.read(BS1).unwrap(), 0b1);
442        assert_eq!(reader.read(BS1).unwrap(), 0b0);
443        assert_eq!(reader.read(BS1).unwrap(), 0b0);
444        assert_eq!(reader.read(BS1).unwrap(), 0b1);
445        assert_eq!(reader.read(BS1).unwrap(), 0b1);
446        assert_eq!(reader.read(BS1).unwrap(), 0b0);
447        assert_eq!(reader.read(BS1).unwrap(), 0b0);
448        assert_eq!(reader.read(BS1).unwrap(), 0b0);
449
450        assert_eq!(reader.read(BS1).unwrap(), 0b0);
451        assert_eq!(reader.read(BS1).unwrap(), 0b0);
452        assert_eq!(reader.read(BS1).unwrap(), 0b0);
453        assert_eq!(reader.read(BS1).unwrap(), 0b1);
454        assert_eq!(reader.read(BS1).unwrap(), 0b0);
455        assert_eq!(reader.read(BS1).unwrap(), 0b0);
456        assert_eq!(reader.read(BS1).unwrap(), 0b0);
457        assert_eq!(reader.read(BS1).unwrap(), 0b0);
458    }
459
460    #[test]
461    fn bit_reader_align() {
462        let data = [0b10011000, 0b00010000];
463        let mut reader = BitReader::new(&data);
464        assert_eq!(reader.read(BS1).unwrap(), 0b1);
465        assert_eq!(reader.read(BS1).unwrap(), 0b0);
466        assert_eq!(reader.read(BS1).unwrap(), 0b0);
467        assert_eq!(reader.read(BS1).unwrap(), 0b1);
468        reader.align();
469
470        assert_eq!(reader.read(BS1).unwrap(), 0b0);
471        assert_eq!(reader.read(BS1).unwrap(), 0b0);
472        assert_eq!(reader.read(BS1).unwrap(), 0b0);
473        assert_eq!(reader.read(BS1).unwrap(), 0b1);
474        assert_eq!(reader.read(BS1).unwrap(), 0b0);
475        assert_eq!(reader.read(BS1).unwrap(), 0b0);
476        assert_eq!(reader.read(BS1).unwrap(), 0b0);
477        assert_eq!(reader.read(BS1).unwrap(), 0b0);
478    }
479
480    #[test]
481    fn bit_reader_chunks() {
482        let data = [0b10011000, 0b00010000];
483        let mut reader = BitChunks::new(&data, BitSize::from_u8(1).unwrap(), 3).unwrap();
484        assert_eq!(reader.next().unwrap().bits.as_ref(), &[0b1, 0b0, 0b0]);
485        assert_eq!(reader.next().unwrap().bits.as_ref(), &[0b1, 0b1, 0b0]);
486        assert_eq!(reader.next().unwrap().bits.as_ref(), &[0b0, 0b0, 0b0]);
487        assert_eq!(reader.next().unwrap().bits.as_ref(), &[0b0, 0b0, 0b1]);
488        assert_eq!(reader.next().unwrap().bits.as_ref(), &[0b0, 0b0, 0b0]);
489    }
490
491    #[test]
492    fn bit_reader_varying_bit_sizes() {
493        let data = [0b10011000, 0b00011111, 0b10101001];
494        let mut reader = BitReader::new(&data);
495        assert_eq!(reader.read(BS4).unwrap(), 0b1001);
496        assert_eq!(reader.read(BS1).unwrap(), 0b1);
497        assert_eq!(reader.read(BS4).unwrap(), 0b0000);
498        assert_eq!(reader.read(BitSize::from_u8(5).unwrap()).unwrap(), 0b00111);
499        assert_eq!(reader.read(BS1).unwrap(), 0b1);
500        assert_eq!(reader.read(BS2).unwrap(), 0b11);
501        assert_eq!(
502            reader.read(BitSize::from_u8(7).unwrap()).unwrap(),
503            0b0101001
504        );
505    }
506}