Skip to main content

rs_crypto/
huffman.rs

1/// Huffman codec for RuneScape chat messages.
2///
3/// Builds a canonical Huffman code from a bit-length table (one byte per symbol,
4/// typically 256 entries loaded from the cache). Provides encode/decode for raw
5/// byte buffers — string handling is left to the caller.
6pub struct Huffman {
7    codewords: Vec<i32>,
8    bits: Vec<u8>,
9    symbol_tree: Vec<i32>,
10}
11
12impl Huffman {
13    /// Build the codec from a bit-length table.
14    pub fn new(bit_lengths: &[u8]) -> Self {
15        let symbols = bit_lengths.len();
16        let mut codewords = vec![0i32; symbols];
17        let bits = bit_lengths.to_vec();
18        let mut next_codewords = [0i32; 33];
19        let mut symbol_tree = vec![0i32; 8];
20        let mut next_node: i32 = 0;
21
22        for symbol in 0..symbols {
23            let codeword_bits = bits[symbol];
24            if codeword_bits == 0 {
25                continue;
26            }
27            let bit = 1i32 << (32 - codeword_bits as i32);
28            let codeword = next_codewords[codeword_bits as usize];
29            codewords[symbol] = codeword;
30
31            let updated;
32            if (bit & codeword) == 0 {
33                updated = codeword | bit;
34                for i in (1..codeword_bits as usize).rev() {
35                    let next_cw = next_codewords[i];
36                    if codeword != next_cw {
37                        break;
38                    }
39                    let bit2 = 1i32 << (32 - i as i32);
40                    if (next_cw & bit2) != 0 {
41                        next_codewords[i] = next_codewords[i - 1];
42                        break;
43                    }
44                    next_codewords[i] = next_cw | bit2;
45                }
46            } else {
47                updated = next_codewords[codeword_bits as usize - 1];
48            }
49
50            next_codewords[codeword_bits as usize] = updated;
51            for entry in &mut next_codewords[(codeword_bits as usize + 1)..=32] {
52                if codeword == *entry {
53                    *entry = updated;
54                }
55            }
56
57            let mut node: i32 = 0;
58            for i in 0..codeword_bits as i32 {
59                let mask = (i32::MIN as u32 >> i as u32) as i32;
60                if (codeword & mask) == 0 {
61                    node += 1;
62                } else {
63                    if symbol_tree[node as usize] == 0 {
64                        symbol_tree[node as usize] = next_node;
65                    }
66                    node = symbol_tree[node as usize];
67                }
68                if node as usize >= symbol_tree.len() {
69                    symbol_tree.resize(symbol_tree.len() * 2, 0);
70                }
71            }
72            symbol_tree[node as usize] = !(symbol as i32);
73            if node >= next_node {
74                next_node = node + 1;
75            }
76        }
77
78        Self {
79            codewords,
80            bits,
81            symbol_tree,
82        }
83    }
84
85    /// Encode `src` into Huffman-compressed bytes in `dest`.
86    /// Returns the number of bytes written.
87    pub fn encode(&self, src: &[u8], dest: &mut [u8]) -> usize {
88        let mut prev_codeword: i32 = 0;
89        let mut bit_pos: i32 = 0;
90
91        for &sym in src {
92            let symbol = sym as usize;
93            let codeword = self.codewords[symbol] as u32;
94            let codeword_bits = self.bits[symbol];
95            if codeword_bits == 0 {
96                continue;
97            }
98
99            let byte_pos = (bit_pos >> 3) as usize;
100            let bit_off = bit_pos & 7;
101            let masked = prev_codeword & ((-bit_off) >> 31);
102            let end_byte_pos = ((bit_off + codeword_bits as i32 - 1) >> 3) as usize + byte_pos;
103            let shift = (bit_off + 24) as u32;
104
105            prev_codeword = masked | (codeword >> shift) as i32;
106            dest[byte_pos] = prev_codeword as u8;
107
108            if end_byte_pos > byte_pos {
109                let p1 = byte_pos + 1;
110                let s1 = shift - 8;
111                prev_codeword = (codeword >> s1) as i32;
112                dest[p1] = prev_codeword as u8;
113
114                if p1 < end_byte_pos {
115                    let p2 = p1 + 1;
116                    let s2 = s1 - 8;
117                    prev_codeword = (codeword >> s2) as i32;
118                    dest[p2] = prev_codeword as u8;
119
120                    if end_byte_pos > p2 {
121                        let p3 = p2 + 1;
122                        let s3 = s2 - 8;
123                        prev_codeword = (codeword >> s3) as i32;
124                        dest[p3] = prev_codeword as u8;
125
126                        if p3 < end_byte_pos {
127                            let s4 = 32 - s3;
128                            let p4 = p3 + 1;
129                            prev_codeword = (codeword << s4) as i32;
130                            dest[p4] = prev_codeword as u8;
131                        }
132                    }
133                }
134            }
135
136            bit_pos += codeword_bits as i32;
137        }
138
139        ((bit_pos + 7) >> 3) as usize
140    }
141
142    /// Decode `len` symbols from Huffman-compressed `src[src_off..]` into `dest`.
143    /// Returns the number of source bytes consumed.
144    pub fn decode(&self, src: &[u8], src_off: usize, dest: &mut [u8], len: usize) -> usize {
145        if len == 0 {
146            return 0;
147        }
148
149        let mut dest_off: usize = 0;
150        let mut src_pos = src_off;
151        let mut node: i32 = 0;
152
153        'outer: loop {
154            let b = src[src_pos] as i8;
155
156            macro_rules! step {
157                ($mask:expr) => {
158                    let next = if (b as i32 & $mask) == 0 {
159                        node + 1
160                    } else {
161                        self.symbol_tree[node as usize]
162                    };
163                    let val = self.symbol_tree[next as usize];
164                    if val < 0 {
165                        dest[dest_off] = (!val) as u8;
166                        dest_off += 1;
167                        if dest_off >= len {
168                            break 'outer;
169                        }
170                        node = 0;
171                    } else {
172                        node = next;
173                    }
174                };
175            }
176
177            let next = if b < 0 {
178                self.symbol_tree[node as usize]
179            } else {
180                node + 1
181            };
182            let val = self.symbol_tree[next as usize];
183            if val < 0 {
184                dest[dest_off] = (!val) as u8;
185                dest_off += 1;
186                if dest_off >= len {
187                    break;
188                }
189                node = 0;
190            } else {
191                node = next;
192            }
193
194            step!(0x40);
195            step!(0x20);
196            step!(0x10);
197            step!(0x08);
198            step!(0x04);
199            step!(0x02);
200            step!(0x01);
201
202            src_pos += 1;
203        }
204
205        src_pos + 1 - src_off
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    fn test_table() -> Vec<u8> {
214        let mut bits = vec![0u8; 256];
215        bits[b'a' as usize] = 1;
216        bits[b'b' as usize] = 2;
217        bits[b'c' as usize] = 3;
218        bits[b'd' as usize] = 3;
219        bits
220    }
221
222    #[test]
223    fn roundtrip() {
224        let h = Huffman::new(&test_table());
225        let src = b"abcd";
226        let mut compressed = vec![0u8; 64];
227        let written = h.encode(src, &mut compressed);
228        compressed.truncate(written);
229
230        let mut decoded = vec![0u8; src.len()];
231        h.decode(&compressed, 0, &mut decoded, src.len());
232        assert_eq!(&decoded, src);
233    }
234
235    #[test]
236    fn empty() {
237        let h = Huffman::new(&test_table());
238        let src: &[u8] = b"";
239        let mut compressed = vec![0u8; 64];
240        let written = h.encode(src, &mut compressed);
241        assert_eq!(written, 0);
242        assert_eq!(h.decode(&compressed, 0, &mut [], 0), 0);
243    }
244
245    #[test]
246    fn single_char() {
247        let h = Huffman::new(&test_table());
248        let src = b"a";
249        let mut compressed = vec![0u8; 64];
250        let written = h.encode(src, &mut compressed);
251        compressed.truncate(written);
252
253        let mut decoded = vec![0u8; 1];
254        h.decode(&compressed, 0, &mut decoded, 1);
255        assert_eq!(&decoded, src);
256    }
257
258    #[test]
259    fn repeated() {
260        let h = Huffman::new(&test_table());
261        let src = b"aaaa";
262        let mut compressed = vec![0u8; 64];
263        let written = h.encode(src, &mut compressed);
264        compressed.truncate(written);
265
266        let mut decoded = vec![0u8; src.len()];
267        h.decode(&compressed, 0, &mut decoded, src.len());
268        assert_eq!(&decoded, src);
269    }
270}