Skip to main content

rar_stream/decompress/
bit_reader.rs

1//! Bit reader for compressed data streams.
2//!
3//! Reads bits from a byte stream, LSB first (RAR convention).
4
5use super::{DecompressError, Result};
6
7/// Bit reader that reads from a byte slice.
8pub struct BitReader<'a> {
9    data: &'a [u8],
10    pos: usize,
11    bit_pos: u32,
12    /// Current bit buffer (up to 32 bits)
13    buffer: u32,
14    /// Bits available in buffer
15    bits_in_buffer: u32,
16}
17
18impl<'a> BitReader<'a> {
19    /// Create a new bit reader from a byte slice.
20    pub fn new(data: &'a [u8]) -> Self {
21        let mut reader = Self {
22            data,
23            pos: 0,
24            bit_pos: 0,
25            buffer: 0,
26            bits_in_buffer: 0,
27        };
28        reader.fill_buffer();
29        reader
30    }
31
32    /// Fill the buffer with more bytes.
33    /// Optimized to read multiple bytes when possible.
34    #[inline(always)]
35    fn fill_buffer(&mut self) {
36        // Fast path: if we need 3+ bytes and have them, read all at once
37        if self.bits_in_buffer <= 8 && self.pos + 3 <= self.data.len() {
38            // Read 3 bytes (24 bits) at once
39            // SAFETY: bounds checked above
40            unsafe {
41                let b0 = *self.data.get_unchecked(self.pos) as u32;
42                let b1 = *self.data.get_unchecked(self.pos + 1) as u32;
43                let b2 = *self.data.get_unchecked(self.pos + 2) as u32;
44                let bytes = (b0 << 16) | (b1 << 8) | b2;
45                self.buffer |= bytes << (8 - self.bits_in_buffer);
46            }
47            self.bits_in_buffer += 24;
48            self.pos += 3;
49            return;
50        }
51
52        // Slow path: read one byte at a time
53        while self.bits_in_buffer <= 24 && self.pos < self.data.len() {
54            // SAFETY: bounds checked above
55            self.buffer |= unsafe {
56                (*self.data.get_unchecked(self.pos) as u32) << (24 - self.bits_in_buffer)
57            };
58            self.bits_in_buffer += 8;
59            self.pos += 1;
60        }
61    }
62
63    /// Peek at the next n bits without consuming them.
64    #[inline(always)]
65    pub fn peek_bits(&self, n: u32) -> u32 {
66        debug_assert!(n <= 16);
67        self.buffer >> (32 - n)
68    }
69
70    /// Read n bits and advance the position.
71    #[inline(always)]
72    pub fn read_bits(&mut self, n: u32) -> Result<u32> {
73        debug_assert!(n <= 16);
74
75        if n > self.bits_in_buffer && self.pos >= self.data.len() {
76            return Err(DecompressError::UnexpectedEof);
77        }
78
79        let value = self.peek_bits(n);
80        self.advance_bits(n);
81        Ok(value)
82    }
83
84    /// Advance by n bits.
85    #[inline(always)]
86    pub fn advance_bits(&mut self, n: u32) {
87        self.buffer <<= n;
88        self.bits_in_buffer = self.bits_in_buffer.saturating_sub(n);
89        self.bit_pos += n;
90        // Only refill when buffer can't satisfy a 16-bit peek
91        if self.bits_in_buffer < 16 {
92            self.fill_buffer();
93        }
94    }
95
96    /// Read a single bit.
97    #[inline(always)]
98    pub fn read_bit(&mut self) -> Result<bool> {
99        Ok(self.read_bits(1)? != 0)
100    }
101
102    /// Read a single byte (8 bits).
103    #[inline]
104    pub fn read_byte(&mut self) -> Option<u8> {
105        self.read_bits(8).ok().map(|v| v as u8)
106    }
107
108    /// Align to byte boundary by skipping remaining bits in current byte.
109    #[inline]
110    pub fn align_to_byte(&mut self) {
111        let bits_used_in_byte = self.bit_pos % 8;
112        if bits_used_in_byte > 0 {
113            let skip = 8 - bits_used_in_byte;
114            self.advance_bits(skip);
115        }
116    }
117
118    /// Get the current bit position.
119    pub fn bit_position(&self) -> u64 {
120        self.bit_pos as u64
121    }
122
123    /// Get the current byte position (bytes consumed from stream).
124    pub fn byte_position(&self) -> usize {
125        self.pos
126    }
127
128    /// Check if at end of data.
129    pub fn is_eof(&self) -> bool {
130        self.bits_in_buffer == 0 && self.pos >= self.data.len()
131    }
132
133    /// Remaining bits available.
134    pub fn remaining_bits(&self) -> u64 {
135        self.bits_in_buffer as u64 + ((self.data.len() - self.pos) as u64 * 8)
136    }
137
138    /// Debug helper to show internal state
139    #[cfg(test)]
140    pub fn debug_state(&self) -> String {
141        format!(
142            "BitReader {{ pos: {}, bit_pos: {}, buffer: {:08x}, bits_in_buffer: {} }}",
143            self.pos, self.bit_pos, self.buffer, self.bits_in_buffer
144        )
145    }
146
147    /// Peek at raw bytes from current logical position (for debugging)
148    #[cfg(test)]
149    pub fn peek_bytes(&self, n: usize) -> Vec<u8> {
150        let byte_pos = (self.bit_pos / 8) as usize;
151        self.data
152            .get(byte_pos..byte_pos + n)
153            .map(|s| s.to_vec())
154            .unwrap_or_default()
155    }
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161
162    #[test]
163    fn test_read_bits() {
164        let data = [0b10110100, 0b11001010];
165        let mut reader = BitReader::new(&data);
166
167        assert_eq!(reader.read_bits(4).unwrap(), 0b1011);
168        assert_eq!(reader.read_bits(4).unwrap(), 0b0100);
169        assert_eq!(reader.read_bits(8).unwrap(), 0b11001010);
170    }
171
172    #[test]
173    fn test_peek_bits() {
174        let data = [0b10110100];
175        let reader = BitReader::new(&data);
176
177        assert_eq!(reader.peek_bits(4), 0b1011);
178        assert_eq!(reader.peek_bits(8), 0b10110100);
179    }
180
181    #[test]
182    fn test_eof() {
183        let data = [0xFF];
184        let mut reader = BitReader::new(&data);
185
186        assert!(!reader.is_eof());
187        reader.read_bits(8).unwrap();
188        assert!(reader.is_eof());
189    }
190}