bzip2_os/bitstream/
bitreader.rs

1//! BitReader reads a packed bitstream for the block-oriented deconstruction of BZIP2 compressed files.
2//! 
3//! NOTE: This module can read from any I/O source that supports the read() call.
4//!
5
6const BUFFER_SIZE: usize = 1024 * 1024;
7const BIT_MASK: u8 = 0xff;
8
9/// Reads a binary Bzip2 file.
10#[derive(Debug)]
11pub struct BitReader<R> {
12    buffer: Vec<u8>,
13    cursor: usize,
14    bit_index: usize,
15    source: R,
16}
17
18impl<R: std::io::Read> BitReader<R> {
19    /// Creates a new bitReader (with a 1Mbyte buffer).
20    pub fn new(source: R) -> Self {
21        Self {
22            buffer: vec![0; BUFFER_SIZE],
23            cursor: BUFFER_SIZE,
24            bit_index: 0,
25            source,
26        }
27    }
28
29    /// Check (and refill) buffer. Returns true if we have data, false if there is no more
30    fn have_data(&mut self) -> bool {
31        // Only try to read more data when the buffer length is equal to the buffer cursor location
32        if self.cursor == self.buffer.len() {
33            let size = self
34                .source
35                .read(&mut self.buffer)
36                .expect("Unable to read source data");
37            // If nothing came back from our read attempt, then we have no more data.
38            if size == 0 {
39                return false;
40            } else {
41                // Adjust the buffer if we read less than the buffer size
42                self.buffer.truncate(size);
43                // Reset the cursor and bit index
44                self.cursor = 0;
45                self.bit_index = 0;
46            }
47        }
48        true
49    }
50
51    /// Return bit as `Option<usize>` (1 or 0), or None if there is no more data to read
52    pub fn bit(&mut self) -> Option<usize> {
53        // If bit_index is == 0, check if we have a byte to read. Return None if we have no data
54        if self.bit_index == 0 && !self.have_data() {
55            return None;
56        }
57        // Otherwise return the bit as an Some(usize)
58        let bit =
59            (self.buffer[self.cursor] & BIT_MASK >> self.bit_index) >> (7 - self.bit_index);
60        self.bit_index += 1;
61        self.bit_index %= 8;
62        if self.bit_index == 0 {
63            self.cursor += 1;
64        }
65        Some(bit as usize)
66    }
67
68    /// Return `Option<Bool>` *true* if the next bit is 1, *false* if 0, consuming the bit, 
69    /// or None if there is no more data to read
70    pub fn bool_bit(&mut self) -> Option<bool> {
71        self.bit().map(|bit| bit == 1)
72    }
73
74    /// Return `Option<usize>` of the next n bits, or None if there is no more data to read. 
75    pub fn bint(&mut self, mut n: usize) -> Option<usize> {
76        /*
77        This is used primarilyl to return signatures and crc values. For example, if a crc
78        value is stored on the stream as a u32, then bint(32) will return the crc value in
79        Some(usize).
80
81        This is optimized to read as many bits as possible for each read.
82        First, look to see if we have less than 8 bits in the current byte. If so, get
83        those. Then get full bytes as needed to fulfill the request. Lastly, get a
84        partial byte to complete the request.
85        */
86        // Prepare the usize for returning
87        let mut result = 0_usize;
88
89        // Test if we have a partial byte of data. If we do, read from it.
90        if self.bit_index > 0 {
91            // Set up to read the minimum of the partial byte and what we need to read
92            let needed = n.min(8 - self.bit_index);
93
94            // Get what we need/can from this partial byte
95            result = ((self.buffer[self.cursor] & BIT_MASK >> self.bit_index)
96                >> (8 - self.bit_index - needed)) as usize;
97            self.bit_index += needed;
98            if self.bit_index / 8 > 0 {
99                self.cursor += 1;
100            }
101            self.bit_index %= 8;
102
103            // See if we got all we needed.
104            if n == needed {
105                // Return if so.
106                return Some(result);
107            } else {
108                // Else adjust what we still need and try to read more data.
109                n -= needed;
110            }
111        }
112        // If we are here, we need more data. Get as many full bytes as we need.
113        while n >= 8 {
114            // Checking always for data
115            if !self.have_data() {
116                return None;
117            }
118            result = result << 8 | (self.buffer[self.cursor]) as usize;
119            self.cursor += 1;
120            n -= 8;
121        }
122        // If we still need a partial byte, get whatever bits we still need.
123        if n > 0 {
124            // Checking always for data
125            if !self.have_data() {
126                return None;
127            }
128            // Get the remaining bits
129            result = result << n | (self.buffer[self.cursor] >> (8 - n)) as usize;
130            // Adjust indecies
131            self.bit_index += n;
132            if self.bit_index / 8 > 1 {
133                self.cursor += 1;
134            }
135            self.bit_index %= 8;
136        }
137        Some(result)
138    }
139
140    /// Returns a byte as an `Option<u8>`, or None if there is no more data to read. This is
141    /// a convenience function, and calls bint(8).
142    pub fn byte(&mut self) -> Option<u8> {
143        self.bint(8).map(|byte| byte as u8)
144    }
145
146    /// Returns an `Option<Vec<u8>>` of n bytes, or None if there is no more data to read. This
147    /// is a convenience function, and calls byte n times.
148    pub fn bytes(&mut self, mut n: usize) -> Option<Vec<u8>> {
149        let mut result: Vec<u8> = Vec::with_capacity(n);
150
151        while n > 0 {
152            if let Some(byte) = self.byte() {
153                result.push(byte);
154                n -= 1;
155            }
156        }
157        Some(result)
158    }
159
160    /// Debugging function. Report current position in the buffer.
161    pub fn loc(&self) -> String {
162        format!("[{}.{}]", self.cursor, self.bit_index)
163    }
164}
165
166/*
167Note: I tried several refactorings to use an iterator to read bits for the above functions,
168but this code above proved faster than any iterator I could devise.
169 */
170
171#[cfg(test)]
172mod test {
173    use super::BitReader;
174
175    #[test]
176    fn basic_test() {
177        let x = [0b10000001_u8].as_slice();
178        let mut br = BitReader::new(x);
179        assert_eq!(br.bit(), Some(1));
180        assert_eq!(br.bit(), Some(0));
181        assert_eq!(br.bit(), Some(0));
182        assert_eq!(br.bit(), Some(0));
183        assert_eq!(br.bit(), Some(0));
184        assert_eq!(br.bit(), Some(0));
185        assert_eq!(br.bit(), Some(0));
186        assert_eq!(br.bit(), Some(1));
187        assert_eq!(br.bit(), None);
188    }
189
190    #[test]
191    fn bint_test() {
192        let x = [0b00011011].as_slice();
193        let mut br = BitReader::new(x);
194        assert_eq!(br.bint(5), Some(3));
195        assert_eq!(br.bint(1), Some(0));
196        assert_eq!(br.bint(2), Some(3));
197    }
198
199    #[test]
200    fn byte_test() {
201        let x = "Hello, world!".as_bytes();
202        let mut br = BitReader::new(x);
203        assert_eq!(br.byte(), Some('H' as u8));
204        assert_eq!(br.byte(), Some('e' as u8));
205        assert_eq!(br.byte(), Some('l' as u8));
206        assert_eq!(br.byte(), Some('l' as u8));
207    }
208
209    #[test]
210    fn bytes_test() {
211        let x = "Hello, world!".as_bytes();
212        let mut br = BitReader::new(x);
213        assert_eq!(br.bytes(5), Some("Hello".as_bytes().to_vec()));
214        }
215
216    #[test]
217    fn loc_test() {
218        let x = "Hello, world!".as_bytes();
219        let mut br = BitReader::new(x);
220        br.bytes(5);
221        br.bit();
222        assert_eq!(br.loc(), "[5.1]");
223        }
224    
225    #[test]
226    fn bool_bit_test() {
227        let x = [0b01010000].as_slice();
228        let mut br = BitReader::new(x);
229        assert_eq!(br.bool_bit(), Some(false));
230        assert_eq!(br.bool_bit(), Some(true));
231        assert_eq!(br.bool_bit(), Some(false));
232        assert_eq!(br.bool_bit(), Some(true));
233        assert_eq!(br.bool_bit(), Some(false));
234        assert_eq!(br.bool_bit(), Some(false));
235        assert_eq!(br.bool_bit(), Some(false));
236        assert_eq!(br.bool_bit(), Some(false));
237    }
238}