Skip to main content

ds_rom/compress/
huffman.rs

1use bitreader::BitReader;
2use rust_bitwriter::BitWriter;
3
4/// De/compresses data with [Huffman coding](https://en.wikipedia.org/wiki/Huffman_coding), one nibble at a time. This struct
5/// is not represented as a tree (like it is formally) but instead the Huffman codes are found in an array of length 16, one
6/// for each possible nibble value (2^4).
7pub struct NibbleHuffman {
8    /// Huffman codes for each nibble value, i.e. `codes[n]` encodes the value `n`. The codes must not be prefixed by any other
9    /// code in the array.
10    pub codes: [NibbleHuffmanCode; 16],
11}
12
13/// A huffman code for [NibbleHuffman].
14pub struct NibbleHuffmanCode {
15    /// The number of bits in [`Self::bits`].
16    pub length: u8,
17    /// The code for the corresponding nibble.
18    pub bits: u8,
19}
20
21impl NibbleHuffman {
22    fn decompress_nibble(&self, reader: &mut BitReader) -> u8 {
23        let (bits_read, value) = self
24            .codes
25            .iter()
26            .enumerate()
27            .find_map(|(index, code)| {
28                let (bits_read, value) = if code.length as u64 > reader.remaining() {
29                    let rem = reader.remaining() as u8;
30                    (rem, reader.peek_u8(rem).unwrap() << (code.length - rem))
31                } else {
32                    (code.length, reader.peek_u8(code.length).unwrap())
33                };
34                (value == code.bits).then_some((bits_read, index as u8))
35            })
36            .unwrap();
37        reader.skip(bits_read as u64).unwrap();
38        value
39    }
40
41    /// Decompresses `data` into the `out` slice. It will decompress until `out` is filled, padding zeros past the end of
42    /// `data`.
43    pub fn decompress_to_slice(&self, data: &[u8], out: &mut [u8]) {
44        let mut reader = BitReader::new(data);
45
46        for i in 0..out.len() {
47            let low = self.decompress_nibble(&mut reader);
48            let high = self.decompress_nibble(&mut reader);
49            out[i] = (high << 4) | low;
50        }
51    }
52
53    fn compress_nibble(&self, writer: &mut BitWriter, data: u8) {
54        assert!(data < 16);
55        let (_, code) = &self.codes.iter().enumerate().find(|(value, _)| *value as u8 == data).unwrap();
56        writer.write_u8(code.bits, code.length).unwrap();
57    }
58
59    /// Compresses `bytes` into the `out` slice. It will truncate the compressed result to fit into `out`.
60    pub fn compress_to_slice(&self, bytes: &[u8], out: &mut [u8]) {
61        let mut writer = BitWriter::new();
62
63        for byte in bytes.iter() {
64            let low = byte & 0xf;
65            let high = byte >> 4;
66            self.compress_nibble(&mut writer, low);
67            self.compress_nibble(&mut writer, high);
68        }
69
70        let _ = writer.close();
71        let data = writer.data();
72        let len = out.len().min(data.len());
73        out[..len].copy_from_slice(&data[..len]);
74    }
75
76    /// Does the opposite of [Self::data_to_diff16]. If `data` consists of 16-bit integers that look like A, B-A, C-B and so
77    /// on, this function will recover the original data A, B, C.
78    ///
79    /// # Panics
80    ///
81    /// Panics if `data.len()` is not a multiple of 2.
82    pub fn diff16_to_data(&self, data: &mut [u8]) {
83        assert!(data.len().is_multiple_of(2));
84        let mut prev = 0;
85        for i in (0..data.len()).step_by(2) {
86            let curr = u16::from_le_bytes([data[i], data[i + 1]]);
87            let value = curr.wrapping_add(prev);
88            data[i..i + 2].copy_from_slice(&value.to_le_bytes());
89            prev = value;
90        }
91    }
92
93    /// Differentiates every 16-bit integer in `data`. For example, if the 16-bit integers are called A, B, C and so on, then
94    /// they will be differentiated to A, B-A, C-B and so on.
95    ///
96    /// If `data` has a lot of repeating values, this will result in plenty of zeros. This benefits Huffman compression, as it
97    /// compresses better if some values occur more often than others.
98    ///
99    /// # Panics
100    ///
101    /// Panics if `data.len()` is not a multiple of 2.
102    pub fn data_to_diff16(&self, data: &mut [u8]) {
103        assert!(data.len().is_multiple_of(2));
104        let mut prev = 0;
105        for i in (0..data.len()).step_by(2) {
106            let curr = u16::from_le_bytes([data[i], data[i + 1]]);
107            let value = curr.wrapping_sub(prev);
108            data[i..i + 2].copy_from_slice(&value.to_le_bytes());
109            prev = curr;
110        }
111    }
112}