1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
use std::{
fmt::Debug,
hint::unreachable_unchecked,
io::{BufWriter, Write},
};
use crate::{
Huffman,
utility::{
data_header::DataHeader,
encoding::{EncodingData, EncodingTable},
huffman_tree::HuffmanTree,
},
};
impl Huffman {
pub fn encode<T>(input: &[u8], output: &mut BufWriter<T>)
where
T: Write + Debug,
{
let occurences = HuffmanTree::find_occurences(input);
let tree = HuffmanTree::generate(occurences);
// for (i, node) in tree.nodes.iter().enumerate() {
// match node {
// Node::Leaf { value, occurence } => println!("{i}: {value}({occurence})"),
// Node::Node {
// left,
// right,
// occurence,
// } => println!("{i}: {left},{right}({occurence})"),
// }
// }
let encoding_table = EncodingTable::new(tree);
// Debug data
// for (value, encoding_data) in encoding_table.iter() {
// println!("{value} {:0b},{}", encoding_data.code, encoding_data.length);
// }
// Write data header
let data_header = DataHeader::new(occurences, input.len() as u64);
data_header.write_to(output).unwrap();
// println!("{output:?}");
let mut state = EncodingState::new();
for byte in input {
encode(*byte, output, &encoding_table, &mut state);
}
if state.offset != 0 {
output.write_all(&[state.current_byte]).unwrap();
}
output.flush().unwrap();
// println!("{output:?}");
}
}
struct EncodingState {
current_byte: u8,
offset: u32,
}
impl EncodingState {
pub fn new() -> Self {
Self {
current_byte: 0,
offset: 0,
}
}
}
fn encode<T>(
input: u8,
output: &mut BufWriter<T>,
encoding_table: &EncodingTable,
state: &mut EncodingState,
) where
T: Write,
{
let encoding_data = encoding_table.get(&input).expect("Missing an entry");
write(encoding_data, output, state);
}
fn write<T>(input: &EncodingData, output: &mut BufWriter<T>, state: &mut EncodingState)
where
T: Write,
{
match state.offset + input.length {
0 => unsafe { unreachable_unchecked() },
1..=7 => {
// println!("normal insert");
// println!(
// "{:b}({}) {:b}({})",
// state.current_byte, state.offset, input.code, input.length
// );
state.current_byte |= input.code << state.offset;
state.offset += input.length;
// println!(
// "{:b}({}) {:b}({})",
// state.current_byte, state.offset, input.code, input.length
// );
}
8 => {
// println!("Finished byte");
// println!(
// "{:b}({}) {:b}({})",
// state.current_byte, state.offset, input.code, input.length
// );
state.current_byte |= input.code << state.offset;
state.offset = 0;
// println!(
// "{:b}({}) {:b}({})",
// state.current_byte, state.offset, input.code, input.length
// );
output.write_all(&[state.current_byte]).unwrap();
state.current_byte = 0;
}
9.. => {
// println!("Overflow insert");
// println!(
// "{:b}({}) {:b}({})",
// state.current_byte, state.offset, input.code, input.length
// );
state.current_byte |= input.code.wrapping_shl(state.offset);
// println!(
// "{:b}({}) {:b}({})",
// state.current_byte, state.offset, input.code, input.length
// );
output.write_all(&[state.current_byte]).unwrap();
let missing_bits = (state.offset + input.length) - 8;
state.current_byte =
(input.code.wrapping_shr(input.length - missing_bits)) & ((1 << missing_bits) - 1);
state.offset = missing_bits;
// println!(
// "{:b}({}) {:b}({})",
// state.current_byte, state.offset, input.code, input.length
// );
// println!("===")
}
}
}
#[test]
fn test_encoding() {
let data = "Hello, World!".as_bytes();
let mut result = Vec::new();
Huffman::encode(data, &mut BufWriter::new(&mut result));
assert!(!result.is_empty());
/* Data
* H e l l o , spc W o r l d !
* 0000 0101 10 10 011 0001 0011 111 011 0100 10 110 0010
*
* bytes (lsb first):
* 0101_0000 1_011_10_10 1_0011_000 100_011_11 10_110_10_0 ??????_00
*/
// const OFFSET: usize = DataHeader::SIZE;
let offset: usize = result.len() - 6;
assert_eq!(result[offset + 0], 0b1010_0000);
assert_eq!(result[offset + 1], 0b0_110_01_01);
assert_eq!(result[offset + 2], 0b1_1100_100);
assert_eq!(result[offset + 3], 0b010_110_11);
assert_eq!(result[offset + 4], 0b00_011_01_0);
assert_eq!(result[offset + 5], 0b01);
}