bzip2_os/bitstream/
bitpacker.rs

1//! BitPacker builds a packed bitstream for the block-oriented construction of BZIP2 compressed files.
2//! 
3//! The original version of BZIP2, being single-threaded, was able to write the bitstream from start to finish.
4//! This multi-threaded version is written so that each block is compressed in a thread from the 
5//! Burrow-Wheeler-Transform through the Huffman encoding phase. The thread then passes the huffman encoded data 
6//! to the final aggregator, which writes the continuous output stream.
7//! 
8//! The packed blocks in each thread are padded at the end with zeros to reach a full byte boundary. In order for the 
9//! final aggregator to know how much padding was added, the BitPacker makes both the huffman compressed data and 
10//! the padding value available to the aggregator.
11//! 
12//! The padding is always a number between 0 and 7 bits.
13//!
14/// Creates a huffman-encoded, packed bitstream of one block of data. The final byte of the block
15/// is padded with zeros to reach a full byte. The padding is always a number between 0 and 7
16/// inclusive.
17pub struct BitPacker {
18    /// The output buffer which can be read externally.
19    pub output: Vec<u8>,
20    /// The number of zero bits padded to the last byte of the output buffer.
21    pub padding: u8,
22    /// The queue holds bits temporarily until we can put full bytes on the output buffer
23    queue: u64,
24    /// q_bits is the number of valid bits in the queue
25    q_bits: u8,
26}
27
28/// Creates a huffman-encoded, packed bitstream of one block of data. The final byte of the block
29/// is padded with zeros to reach a full byte. The padding is always a number between 0 and 7
30/// inclusive.
31impl BitPacker {
32    /// Create a new BitPacker with an output buffer with the capacity specified in size.
33    pub fn new(size: usize) -> Self {
34        Self {
35            output: Vec::with_capacity(size),
36            padding: 0,
37            queue: 0,
38            q_bits: 0,
39        }
40    }
41
42    /// Internal bitstream write function common to all out.XX functions. Keeps the queue short.
43    fn write_stream(&mut self) {
44        while self.q_bits > 7 {
45            let byte = (self.queue >> (self.q_bits - 8)) as u8;
46            self.output.push(byte); //push the packed byte out
47            self.q_bits -= 8; //adjust the count of bits left in the queue
48        }
49    }
50
51    /*
52    NOTE: out24 takes a u32.  The 8 most significant bits of the word indicate how
53    many of the least significant bits will be written. Those bits must be aligned to
54    the least signficant bit. (The middle bits are masked out.)
55    binary encoded data and puts it on the stream.
56
57    It is primarily used to write odd size data.
58    Eg 0000100_00000000_00000000_00000010 writes out 0010.
59    */
60    /// Writes 0-24 bits encoded with the number of bits to write in the most
61    /// significant byte of a 32 bit word.
62    pub fn out24(&mut self, data: u32) {
63        let depth = (data >> 24) as u8; //get bit length by shifting out the 24 data bits
64        self.queue <<= depth; //shift queue by bit length
65        self.queue |= (data & (0xffffffff >> (32 - depth))) as u64; //add data portion to queue
66        self.q_bits += depth; //update depth of queue bits
67        self.write_stream();
68    }
69
70    /// Puts a 32 bit word of pre-packed binary encoded data on the stream.
71    pub fn out32(&mut self, data: u32) {
72        self.queue <<= 32; //shift queue by bit length
73        self.queue |= data as u64; //add data portion to queue
74        self.q_bits += 32; //update depth of queue bits
75        self.write_stream();
76    }
77
78    /// Puts a 16 bit word  of pre-packed binary encoded data on the stream.
79    pub fn out16(&mut self, data: u16) {
80        self.queue <<= 16; //shift queue by bit length
81        self.queue |= data as u64; //add data portion to queue
82        self.q_bits += 16; //update depth of queue bits
83        self.write_stream();
84    }
85
86    // out_8 is not currently used, but is here for completeness.
87    // /// Puts an 8 bit word  of pre-packed binary encoded data on the stream.
88    // pub fn out8(&mut self, data: u8) {
89    //     self.queue <<= 8; //shift queue by bit length
90    //     self.queue |= data as u64; //add data portion to queue
91    //     self.q_bits += 8; //update depth of queue bits
92    //     self.write_stream();
93    // }
94
95    /// Flushes the remaining bits (1-7) from the buffer, padding with 0s in the least
96    /// signficant bits. Flush MUST be called before reading the output or data may be
97    /// left in the private queue.
98    pub fn flush(&mut self) {
99        if self.q_bits > 0 {
100            self.padding = 8 - self.q_bits % 8;
101
102            self.queue <<= self.padding; //pad the queue with zeros
103            self.q_bits += self.padding;
104            self.write_stream(); // write out all that is left
105        }
106    }
107
108    /// Debugging function to return the number of bytes.bits output so far
109    pub fn loc(&self) -> String {
110        format! {"[{}.{}]",((self.output.len() * 8) + self.q_bits as usize)/8, ((self.output.len() * 8) + self.q_bits as usize)%8}
111    }
112}
113
114#[cfg(test)]
115mod test {
116    use super::BitPacker;
117
118    #[test]
119    fn out16_test() {
120        let mut bp = BitPacker::new(100);
121        let data = 0b00100001_00100000;
122        bp.out16(data);
123        bp.flush();
124        let out = bp.output;
125        assert_eq!(out, "! ".as_bytes());
126    }
127
128    #[test]
129    fn out24_and_loc_test() {
130        let mut bp = BitPacker::new(100);
131        let data = 0b00001000_00000000_00000000_00100001;
132        bp.out24(data);
133        bp.flush();
134        let out = &bp.output;
135        assert_eq!(out, "!".as_bytes());
136        assert_eq!("[1.0]", &bp.loc());
137        let data = 0b00011000_00000000_00000000_00000011;
138        bp.out24(data);
139        bp.flush();
140        let out2 = &bp.output;
141        assert_eq!(out2, &[33, 0, 0, 3]); // Note: '33' is data from previous call
142        assert_eq!("[4.0]", &bp.loc());
143    }
144
145    #[test]
146    fn out24_short_test() {
147        let mut bp = BitPacker::new(100);
148        // add 2 bits, both set
149        let data = 0b00000010_00000000_00000000_00000011;
150        bp.out24(data);
151        bp.flush();
152        let out2 = &bp.output;
153        // bits should show at the front.
154        assert_eq!(out2, &[0b1100_0000]);
155        assert_eq!("[1.0]", &bp.loc());
156    }
157
158    #[test]
159    fn out32_test() {
160        let mut bp = BitPacker::new(100);
161        let data = 0b00100001_00100000_00100001_00100000;
162        bp.out32(data);
163        bp.flush();
164        let out = bp.output;
165        assert_eq!(out, [33, 32, 33, 32]);
166    }
167}