1pub struct Huffman {
7 codewords: Vec<i32>,
8 bits: Vec<u8>,
9 symbol_tree: Vec<i32>,
10}
11
12impl Huffman {
13 pub fn new(bit_lengths: &[u8]) -> Self {
15 let symbols = bit_lengths.len();
16 let mut codewords = vec![0i32; symbols];
17 let bits = bit_lengths.to_vec();
18 let mut next_codewords = [0i32; 33];
19 let mut symbol_tree = vec![0i32; 8];
20 let mut next_node: i32 = 0;
21
22 for symbol in 0..symbols {
23 let codeword_bits = bits[symbol];
24 if codeword_bits == 0 {
25 continue;
26 }
27 let bit = 1i32 << (32 - codeword_bits as i32);
28 let codeword = next_codewords[codeword_bits as usize];
29 codewords[symbol] = codeword;
30
31 let updated;
32 if (bit & codeword) == 0 {
33 updated = codeword | bit;
34 for i in (1..codeword_bits as usize).rev() {
35 let next_cw = next_codewords[i];
36 if codeword != next_cw {
37 break;
38 }
39 let bit2 = 1i32 << (32 - i as i32);
40 if (next_cw & bit2) != 0 {
41 next_codewords[i] = next_codewords[i - 1];
42 break;
43 }
44 next_codewords[i] = next_cw | bit2;
45 }
46 } else {
47 updated = next_codewords[codeword_bits as usize - 1];
48 }
49
50 next_codewords[codeword_bits as usize] = updated;
51 for entry in &mut next_codewords[(codeword_bits as usize + 1)..=32] {
52 if codeword == *entry {
53 *entry = updated;
54 }
55 }
56
57 let mut node: i32 = 0;
58 for i in 0..codeword_bits as i32 {
59 let mask = (i32::MIN as u32 >> i as u32) as i32;
60 if (codeword & mask) == 0 {
61 node += 1;
62 } else {
63 if symbol_tree[node as usize] == 0 {
64 symbol_tree[node as usize] = next_node;
65 }
66 node = symbol_tree[node as usize];
67 }
68 if node as usize >= symbol_tree.len() {
69 symbol_tree.resize(symbol_tree.len() * 2, 0);
70 }
71 }
72 symbol_tree[node as usize] = !(symbol as i32);
73 if node >= next_node {
74 next_node = node + 1;
75 }
76 }
77
78 Self {
79 codewords,
80 bits,
81 symbol_tree,
82 }
83 }
84
85 pub fn encode(&self, src: &[u8], dest: &mut [u8]) -> usize {
88 let mut prev_codeword: i32 = 0;
89 let mut bit_pos: i32 = 0;
90
91 for &sym in src {
92 let symbol = sym as usize;
93 let codeword = self.codewords[symbol] as u32;
94 let codeword_bits = self.bits[symbol];
95 if codeword_bits == 0 {
96 continue;
97 }
98
99 let byte_pos = (bit_pos >> 3) as usize;
100 let bit_off = bit_pos & 7;
101 let masked = prev_codeword & ((-bit_off) >> 31);
102 let end_byte_pos = ((bit_off + codeword_bits as i32 - 1) >> 3) as usize + byte_pos;
103 let shift = (bit_off + 24) as u32;
104
105 prev_codeword = masked | (codeword >> shift) as i32;
106 dest[byte_pos] = prev_codeword as u8;
107
108 if end_byte_pos > byte_pos {
109 let p1 = byte_pos + 1;
110 let s1 = shift - 8;
111 prev_codeword = (codeword >> s1) as i32;
112 dest[p1] = prev_codeword as u8;
113
114 if p1 < end_byte_pos {
115 let p2 = p1 + 1;
116 let s2 = s1 - 8;
117 prev_codeword = (codeword >> s2) as i32;
118 dest[p2] = prev_codeword as u8;
119
120 if end_byte_pos > p2 {
121 let p3 = p2 + 1;
122 let s3 = s2 - 8;
123 prev_codeword = (codeword >> s3) as i32;
124 dest[p3] = prev_codeword as u8;
125
126 if p3 < end_byte_pos {
127 let s4 = 32 - s3;
128 let p4 = p3 + 1;
129 prev_codeword = (codeword << s4) as i32;
130 dest[p4] = prev_codeword as u8;
131 }
132 }
133 }
134 }
135
136 bit_pos += codeword_bits as i32;
137 }
138
139 ((bit_pos + 7) >> 3) as usize
140 }
141
142 pub fn decode(&self, src: &[u8], src_off: usize, dest: &mut [u8], len: usize) -> usize {
145 if len == 0 {
146 return 0;
147 }
148
149 let mut dest_off: usize = 0;
150 let mut src_pos = src_off;
151 let mut node: i32 = 0;
152
153 'outer: loop {
154 let b = src[src_pos] as i8;
155
156 macro_rules! step {
157 ($mask:expr) => {
158 let next = if (b as i32 & $mask) == 0 {
159 node + 1
160 } else {
161 self.symbol_tree[node as usize]
162 };
163 let val = self.symbol_tree[next as usize];
164 if val < 0 {
165 dest[dest_off] = (!val) as u8;
166 dest_off += 1;
167 if dest_off >= len {
168 break 'outer;
169 }
170 node = 0;
171 } else {
172 node = next;
173 }
174 };
175 }
176
177 let next = if b < 0 {
178 self.symbol_tree[node as usize]
179 } else {
180 node + 1
181 };
182 let val = self.symbol_tree[next as usize];
183 if val < 0 {
184 dest[dest_off] = (!val) as u8;
185 dest_off += 1;
186 if dest_off >= len {
187 break;
188 }
189 node = 0;
190 } else {
191 node = next;
192 }
193
194 step!(0x40);
195 step!(0x20);
196 step!(0x10);
197 step!(0x08);
198 step!(0x04);
199 step!(0x02);
200 step!(0x01);
201
202 src_pos += 1;
203 }
204
205 src_pos + 1 - src_off
206 }
207}
208
209#[cfg(test)]
210mod tests {
211 use super::*;
212
213 fn test_table() -> Vec<u8> {
214 let mut bits = vec![0u8; 256];
215 bits[b'a' as usize] = 1;
216 bits[b'b' as usize] = 2;
217 bits[b'c' as usize] = 3;
218 bits[b'd' as usize] = 3;
219 bits
220 }
221
222 #[test]
223 fn roundtrip() {
224 let h = Huffman::new(&test_table());
225 let src = b"abcd";
226 let mut compressed = vec![0u8; 64];
227 let written = h.encode(src, &mut compressed);
228 compressed.truncate(written);
229
230 let mut decoded = vec![0u8; src.len()];
231 h.decode(&compressed, 0, &mut decoded, src.len());
232 assert_eq!(&decoded, src);
233 }
234
235 #[test]
236 fn empty() {
237 let h = Huffman::new(&test_table());
238 let src: &[u8] = b"";
239 let mut compressed = vec![0u8; 64];
240 let written = h.encode(src, &mut compressed);
241 assert_eq!(written, 0);
242 assert_eq!(h.decode(&compressed, 0, &mut [], 0), 0);
243 }
244
245 #[test]
246 fn single_char() {
247 let h = Huffman::new(&test_table());
248 let src = b"a";
249 let mut compressed = vec![0u8; 64];
250 let written = h.encode(src, &mut compressed);
251 compressed.truncate(written);
252
253 let mut decoded = vec![0u8; 1];
254 h.decode(&compressed, 0, &mut decoded, 1);
255 assert_eq!(&decoded, src);
256 }
257
258 #[test]
259 fn repeated() {
260 let h = Huffman::new(&test_table());
261 let src = b"aaaa";
262 let mut compressed = vec![0u8; 64];
263 let written = h.encode(src, &mut compressed);
264 compressed.truncate(written);
265
266 let mut decoded = vec![0u8; src.len()];
267 h.decode(&compressed, 0, &mut decoded, src.len());
268 assert_eq!(&decoded, src);
269 }
270}