1pub mod cli;
12pub mod fs;
13
14use std::collections::{HashMap, VecDeque};
15
16use bitvec::{order::Msb0, vec::BitVec};
17
18#[derive(Debug)]
19pub struct FrequencyBuffer(pub [u64; 256]);
20
21pub fn tally_frequency(bytes: &[u8]) -> FrequencyBuffer {
22 let mut fb = FrequencyBuffer([0; 256]);
23 bytes.iter().for_each(|byte| unsafe {
24 *fb.0.as_mut_ptr().add(*byte as usize) += 1;
25 });
26 fb
27}
28
29type Idx = u8;
32type Freq = u64;
33pub fn find_and_pop_min(freq_buf: &mut [u64]) -> Option<(Idx, Freq)> {
34 let mut min_value_idx = None;
35 let mut min_value = None;
36 freq_buf.iter_mut().enumerate().for_each(|(idx, byte)| {
37 if *byte == 0 {
38 return;
39 }
40
41 match min_value {
42 None => {
43 min_value = Some(*byte);
44 min_value_idx = Some(idx as u8);
45 }
46 Some(v) if *byte < v => {
47 min_value = Some(*byte);
48 min_value_idx = Some(idx as u8);
49 }
50 _ => (),
51 }
52 });
53
54 if let Some(idx) = min_value_idx {
55 let byte = unsafe { freq_buf.get_unchecked_mut(idx as usize) };
56 *byte = 0;
57 }
58
59 match (min_value_idx, min_value) {
60 (Some(idx), Some(value)) => Some((idx, value)),
61 _ => None,
62 }
63}
64
65pub fn huff_encode_bitvec(bytes: &[u8], encoded_map: &HashMap<u8, Encoded>) -> (Vec<u8>, u64) {
66 let mut final_bits: BitVec<u8, Msb0> = BitVec::with_capacity(bytes.len() / 2);
67 for byte in bytes {
68 let encoded = encoded_map.get(byte).unwrap();
69 final_bits.extend(encoded.bits.iter());
70 }
71
72 let total_bits = final_bits.len();
73 (final_bits.into(), total_bits as u64)
74}
75
76#[derive(Debug, PartialEq, Eq)]
77pub struct Encoded {
78 bits: BitVec<u8, Msb0>,
80 num_bits_sequence: u8,
82 original_value: u8,
83}
84
85fn u64_to_u8(value: u64) -> [u8; 8] {
86 [
87 (value >> 56) as u8,
88 (value >> 48) as u8,
89 (value >> 40) as u8,
90 (value >> 32) as u8,
91 (value >> 24) as u8,
92 (value >> 16) as u8,
93 (value >> 8) as u8,
94 value as u8,
95 ]
96}
97
98pub fn serialize_huffman(
99 encoded_map: &HashMap<u8, Encoded>,
100 bit_buffer: Vec<u8>,
101 total_bits: u64,
102) -> Vec<u8> {
103 let mut serialized_buffer = u64_to_u8(total_bits).to_vec();
104
105 let mut tmp_buffer = Vec::new();
106
107 for encoded in encoded_map.values() {
108 tmp_buffer.push(encoded.original_value);
109 tmp_buffer.push(encoded.num_bits_sequence);
110 tmp_buffer.push(*encoded.bits.last().unwrap() as u8);
111 }
112
113 let size_of_header_bytes = tmp_buffer.len() as u64;
114 let size_of_header_arr = u64_to_u8(size_of_header_bytes);
115 serialized_buffer.extend_from_slice(&size_of_header_arr);
116 serialized_buffer.extend(tmp_buffer);
117 serialized_buffer.extend(bit_buffer);
118
119 serialized_buffer
120}
121
122#[derive(Debug, Clone, Copy)]
123struct ValueBitMap {
124 value: u8,
125 ends_in_1: bool,
126}
127
128pub fn deserialze_huffman(huff_bytes: &[u8]) -> Vec<u8> {
129 let total_bits = u8_to_u64(&huff_bytes[0..8]);
130 let header_end_byte = 16;
131 let header_num_bytes = u8_to_u64(&huff_bytes[8..header_end_byte]);
132
133 let mut map_values = HashMap::new();
134 let mut idx = header_end_byte;
135 let mut max_bits = 0;
136 while idx - header_end_byte < header_num_bytes as usize {
137 let value = huff_bytes[idx];
138 let encoding_number_of_bits = huff_bytes[idx + 1];
139 let ends_in_1 = huff_bytes[idx + 2] != 0;
140
141 max_bits = max_bits.max(encoding_number_of_bits);
142 let value_bit_map = ValueBitMap { value, ends_in_1 };
143
144 map_values
145 .entry(encoding_number_of_bits)
146 .and_modify(|v: &mut Vec<ValueBitMap>| v.push(value_bit_map))
147 .or_insert(vec![value_bit_map]);
148
149 idx += 3
150 }
151
152 let mut decoded_buffer: Vec<u8> = Vec::new();
153 let bit_vec: BitVec<u8, Msb0> = BitVec::from_slice(&huff_bytes[idx..]);
154 let mut bit_vec_iter = bit_vec.iter();
155
156 let mut bits_to_target = 0;
157
158 let mut read_bits = 0;
159 while read_bits < total_bits {
160 let bit = bit_vec_iter.next().unwrap();
161 if *bit {
162 bits_to_target += 1;
163 let value_bit_map = map_values.get(&bits_to_target).unwrap();
165 decoded_buffer.push(value_bit_map.iter().find(|v| v.ends_in_1).unwrap().value);
166 read_bits += bits_to_target as u64;
167 bits_to_target = 0;
168 } else {
169 bits_to_target += 1;
170 if bits_to_target >= max_bits {
171 let value_bit_map = map_values.get(&bits_to_target).unwrap();
173 decoded_buffer.push(value_bit_map.iter().find(|v| !v.ends_in_1).unwrap().value);
174 read_bits += bits_to_target as u64;
175 bits_to_target = 0
176 }
177 }
178 }
179
180 decoded_buffer
181}
182
183fn u8_to_u64(bytes: &[u8]) -> u64 {
184 assert!(bytes.len() >= 8);
185 (bytes[0] as u64) << 56
186 | (bytes[1] as u64) << 48
187 | (bytes[2] as u64) << 40
188 | (bytes[3] as u64) << 32
189 | (bytes[4] as u64) << 24
190 | (bytes[5] as u64) << 16
191 | (bytes[6] as u64) << 8
192 | bytes[7] as u64
193}
194
195pub fn build_huffman_array(mut freq_buffer: FrequencyBuffer) -> Vec<u8> {
201 let mut buffer = VecDeque::new();
202 while let Some((idx, _)) = find_and_pop_min(&mut freq_buffer.0) {
203 buffer.push_front(idx);
204 }
205 buffer.into()
206}
207
208pub fn encode_huffman_array(huffman_array: &[u8]) -> HashMap<u8, Encoded> {
209 huffman_array
210 .iter()
211 .enumerate()
212 .map(|(idx, value)| {
213 if idx == huffman_array.len() - 1 {
214 let bv: BitVec<u8, Msb0> = (0..idx as u8).map(|_| false).collect();
215 let num_bits_sequence = bv.len() as u8;
216 return (
217 *value,
218 Encoded {
219 bits: bv,
220 num_bits_sequence,
221 original_value: *value,
222 },
223 );
224 }
225
226 let bv: BitVec<u8, Msb0> = (0..idx as u8 + 1)
227 .map(|i| {
228 if i == idx as u8 {
229 return true;
230 }
231 false
232 })
233 .collect();
234 let num_bits_sequence = bv.len() as u8;
235 (
236 *value,
237 Encoded {
238 bits: bv,
239 num_bits_sequence,
240 original_value: *value,
241 },
242 )
243 })
244 .collect()
245}
246
247#[cfg(test)]
248mod tests {
249 use super::*;
250 use bitvec::prelude::*;
251
252 fn encoded_buffer_to_string(buffer: &[u8]) -> String {
253 buffer
254 .iter()
255 .map(|b| format!("{:08b}", b))
256 .collect::<String>()
257 }
258
259 #[test]
260 fn find_and_pop_min_not_empty() {
261 let mut bytes = [4, 8, 1];
262 let result = find_and_pop_min(&mut bytes);
263
264 assert!(result == Some((2, 1)));
265 assert!(bytes == [4, 8, 0]);
266 }
267
268 #[test]
269 fn find_and_pop_min_empty() {
270 let mut bytes = [0, 0, 0];
271 let result = find_and_pop_min(&mut bytes);
272 assert!(result.is_none());
273 }
274
275 #[test]
276 fn build_small_huffman_array() {
277 let bytes = [1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 1];
278 let freq_buff = tally_frequency(&bytes);
279 let actual = build_huffman_array(freq_buff);
280 let expected = vec![1, 3, 2];
281 assert_eq!(actual, expected)
282 }
283
284 #[test]
285 fn build_encoded_map_from_huffman_array() {
286 let huff_arr = vec![1, 3, 2];
287 let actual = encode_huffman_array(&huff_arr);
288
289 let mut expected = HashMap::new();
290 let mut bv = bitvec![u8, Msb0;];
291 bv.push(true);
292 expected.insert(
293 1,
294 Encoded {
295 bits: bv,
296 num_bits_sequence: 1,
297 original_value: 1,
298 },
299 );
300 let mut bv = bitvec![u8, Msb0;];
301 bv.push(false);
302 bv.push(true);
303 expected.insert(
304 3,
305 Encoded {
306 bits: bv,
307 num_bits_sequence: 2,
308 original_value: 3,
309 },
310 );
311 let mut bv = bitvec![u8, Msb0;];
312 bv.push(false);
313 bv.push(false);
314 expected.insert(
315 2,
316 Encoded {
317 bits: bv,
318 num_bits_sequence: 2,
319 original_value: 2,
320 },
321 );
322
323 assert_eq!(actual, expected)
324 }
325
326 #[test]
327 fn build_huffman_tree_test_simple() {
328 let bytes = [1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 1];
329 let freq_buff = tally_frequency(&bytes);
330 let huffnode = build_huffman_array(freq_buff);
331 let encode_map = encode_huffman_array(&huffnode);
332 let (encoded_buffer, total_bits) = huff_encode_bitvec(&bytes, &encode_map);
333 let expected_buffer = "1001111111011000";
334 assert_eq!(encoded_buffer_to_string(&encoded_buffer), expected_buffer);
335 let expected_total_bits = 13;
336 assert_eq!(total_bits, expected_total_bits);
337 }
338
339 #[test]
340 fn serialize_huffman_test() {
341 let bytes = [1, 3, 1, 2];
342 let freq_buff = tally_frequency(&bytes);
343 let huffnode = build_huffman_array(freq_buff);
344 let encode_map = encode_huffman_array(&huffnode);
345 let (encoded_buffer, total_bits) = huff_encode_bitvec(&bytes, &encode_map);
346 let mut serialized_buffer = serialize_huffman(&encode_map, encoded_buffer, total_bits);
347 serialized_buffer.sort();
348 let mut expected = [
349 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 9, 1, 1, 1, 3, 2, 1, 2, 2, 0, 176,
350 ];
351 expected.sort();
352
353 assert_eq!(serialized_buffer, expected);
354 }
355
356 #[test]
357 fn test_deserialize_huffman() {
358 let target = [1, 3, 1, 2];
359
360 let serialized_bytes = [
361 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 9, 1, 1, 1, 2, 2, 0, 3, 2, 1, 176,
362 ];
363
364 let actual = deserialze_huffman(&serialized_bytes);
365
366 assert_eq!(actual, target);
367 }
368}