bzip2_os/bitstream/bitpacker.rs
1//! BitPacker builds a packed bitstream for the block-oriented construction of BZIP2 compressed files.
2//!
3//! The original version of BZIP2, being single-threaded, was able to write the bitstream from start to finish.
4//! This multi-threaded version is written so that each block is compressed in a thread from the
5//! Burrow-Wheeler-Transform through the Huffman encoding phase. The thread then passes the huffman encoded data
6//! to the final aggregator, which writes the continuous output stream.
7//!
8//! The packed blocks in each thread are padded at the end with zeros to reach a full byte boundary. In order for the
9//! final aggregator to know how much padding was added, the BitPacker makes both the huffman compressed data and
10//! the padding value available to the aggregator.
11//!
12//! The padding is always a number between 0 and 7 bits.
13//!
14/// Creates a huffman-encoded, packed bitstream of one block of data. The final byte of the block
15/// is padded with zeros to reach a full byte. The padding is always a number between 0 and 7
16/// inclusive.
17pub struct BitPacker {
18 /// The output buffer which can be read externally.
19 pub output: Vec<u8>,
20 /// The number of zero bits padded to the last byte of the output buffer.
21 pub padding: u8,
22 /// The queue holds bits temporarily until we can put full bytes on the output buffer
23 queue: u64,
24 /// q_bits is the number of valid bits in the queue
25 q_bits: u8,
26}
27
28/// Creates a huffman-encoded, packed bitstream of one block of data. The final byte of the block
29/// is padded with zeros to reach a full byte. The padding is always a number between 0 and 7
30/// inclusive.
31impl BitPacker {
32 /// Create a new BitPacker with an output buffer with the capacity specified in size.
33 pub fn new(size: usize) -> Self {
34 Self {
35 output: Vec::with_capacity(size),
36 padding: 0,
37 queue: 0,
38 q_bits: 0,
39 }
40 }
41
42 /// Internal bitstream write function common to all out.XX functions. Keeps the queue short.
43 fn write_stream(&mut self) {
44 while self.q_bits > 7 {
45 let byte = (self.queue >> (self.q_bits - 8)) as u8;
46 self.output.push(byte); //push the packed byte out
47 self.q_bits -= 8; //adjust the count of bits left in the queue
48 }
49 }
50
51 /*
52 NOTE: out24 takes a u32. The 8 most significant bits of the word indicate how
53 many of the least significant bits will be written. Those bits must be aligned to
54 the least signficant bit. (The middle bits are masked out.)
55 binary encoded data and puts it on the stream.
56
57 It is primarily used to write odd size data.
58 Eg 0000100_00000000_00000000_00000010 writes out 0010.
59 */
60 /// Writes 0-24 bits encoded with the number of bits to write in the most
61 /// significant byte of a 32 bit word.
62 pub fn out24(&mut self, data: u32) {
63 let depth = (data >> 24) as u8; //get bit length by shifting out the 24 data bits
64 self.queue <<= depth; //shift queue by bit length
65 self.queue |= (data & (0xffffffff >> (32 - depth))) as u64; //add data portion to queue
66 self.q_bits += depth; //update depth of queue bits
67 self.write_stream();
68 }
69
70 /// Puts a 32 bit word of pre-packed binary encoded data on the stream.
71 pub fn out32(&mut self, data: u32) {
72 self.queue <<= 32; //shift queue by bit length
73 self.queue |= data as u64; //add data portion to queue
74 self.q_bits += 32; //update depth of queue bits
75 self.write_stream();
76 }
77
78 /// Puts a 16 bit word of pre-packed binary encoded data on the stream.
79 pub fn out16(&mut self, data: u16) {
80 self.queue <<= 16; //shift queue by bit length
81 self.queue |= data as u64; //add data portion to queue
82 self.q_bits += 16; //update depth of queue bits
83 self.write_stream();
84 }
85
86 // out_8 is not currently used, but is here for completeness.
87 // /// Puts an 8 bit word of pre-packed binary encoded data on the stream.
88 // pub fn out8(&mut self, data: u8) {
89 // self.queue <<= 8; //shift queue by bit length
90 // self.queue |= data as u64; //add data portion to queue
91 // self.q_bits += 8; //update depth of queue bits
92 // self.write_stream();
93 // }
94
95 /// Flushes the remaining bits (1-7) from the buffer, padding with 0s in the least
96 /// signficant bits. Flush MUST be called before reading the output or data may be
97 /// left in the private queue.
98 pub fn flush(&mut self) {
99 if self.q_bits > 0 {
100 self.padding = 8 - self.q_bits % 8;
101
102 self.queue <<= self.padding; //pad the queue with zeros
103 self.q_bits += self.padding;
104 self.write_stream(); // write out all that is left
105 }
106 }
107
108 /// Debugging function to return the number of bytes.bits output so far
109 pub fn loc(&self) -> String {
110 format! {"[{}.{}]",((self.output.len() * 8) + self.q_bits as usize)/8, ((self.output.len() * 8) + self.q_bits as usize)%8}
111 }
112}
113
114#[cfg(test)]
115mod test {
116 use super::BitPacker;
117
118 #[test]
119 fn out16_test() {
120 let mut bp = BitPacker::new(100);
121 let data = 0b00100001_00100000;
122 bp.out16(data);
123 bp.flush();
124 let out = bp.output;
125 assert_eq!(out, "! ".as_bytes());
126 }
127
128 #[test]
129 fn out24_and_loc_test() {
130 let mut bp = BitPacker::new(100);
131 let data = 0b00001000_00000000_00000000_00100001;
132 bp.out24(data);
133 bp.flush();
134 let out = &bp.output;
135 assert_eq!(out, "!".as_bytes());
136 assert_eq!("[1.0]", &bp.loc());
137 let data = 0b00011000_00000000_00000000_00000011;
138 bp.out24(data);
139 bp.flush();
140 let out2 = &bp.output;
141 assert_eq!(out2, &[33, 0, 0, 3]); // Note: '33' is data from previous call
142 assert_eq!("[4.0]", &bp.loc());
143 }
144
145 #[test]
146 fn out24_short_test() {
147 let mut bp = BitPacker::new(100);
148 // add 2 bits, both set
149 let data = 0b00000010_00000000_00000000_00000011;
150 bp.out24(data);
151 bp.flush();
152 let out2 = &bp.output;
153 // bits should show at the front.
154 assert_eq!(out2, &[0b1100_0000]);
155 assert_eq!("[1.0]", &bp.loc());
156 }
157
158 #[test]
159 fn out32_test() {
160 let mut bp = BitPacker::new(100);
161 let data = 0b00100001_00100000_00100001_00100000;
162 bp.out32(data);
163 bp.flush();
164 let out = bp.output;
165 assert_eq!(out, [33, 32, 33, 32]);
166 }
167}