ds_rom/compress/
huffman.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
use bitreader::BitReader;
use rust_bitwriter::BitWriter;

/// De/compresses data with [Huffman coding](https://en.wikipedia.org/wiki/Huffman_coding), one nibble at a time. This struct
/// is not represented as a tree (like it is formally) but instead the Huffman codes are found in an array of length 16, one
/// for each possible nibble value (2^4).
pub struct NibbleHuffman {
    /// Huffman codes for each nibble value, i.e. `codes[n]` encodes the value `n`. The codes must not be prefixed by any other
    /// code in the array.
    pub codes: [NibbleHuffmanCode; 16],
}

/// A huffman code for [NibbleHuffman].
pub struct NibbleHuffmanCode {
    /// The number of bits in [`Self::bits`].
    pub length: u8,
    /// The code for the corresponding nibble.
    pub bits: u8,
}

impl NibbleHuffman {
    fn decompress_nibble(&self, reader: &mut BitReader) -> u8 {
        let (bits_read, value) = self
            .codes
            .iter()
            .enumerate()
            .find_map(|(index, code)| {
                let (bits_read, value) = if code.length as u64 > reader.remaining() {
                    let rem = reader.remaining() as u8;
                    (rem, reader.peek_u8(rem).unwrap() << (code.length - rem))
                } else {
                    (code.length, reader.peek_u8(code.length).unwrap())
                };
                (value == code.bits).then_some((bits_read, index as u8))
            })
            .unwrap();
        reader.skip(bits_read as u64).unwrap();
        value
    }

    /// Decompresses `data` into the `out` slice. It will decompress until `out` is filled, padding zeros past the end of
    /// `data`.
    pub fn decompress_to_slice(&self, data: &[u8], out: &mut [u8]) {
        let mut reader = BitReader::new(data);

        for i in 0..out.len() {
            let low = self.decompress_nibble(&mut reader);
            let high = self.decompress_nibble(&mut reader);
            out[i] = high << 4 | low;
        }
    }

    fn compress_nibble(&self, writer: &mut BitWriter, data: u8) {
        assert!(data < 16);
        let (_, code) = &self.codes.iter().enumerate().find(|(value, _)| *value as u8 == data).unwrap();
        writer.write_u8(code.bits, code.length).unwrap();
    }

    /// Compresses `bytes` into the `out` slice. It will truncate the compressed result to fit into `out`.
    pub fn compress_to_slice(&self, bytes: &[u8], out: &mut [u8]) {
        let mut writer = BitWriter::new();

        for byte in bytes.iter() {
            let low = byte & 0xf;
            let high = byte >> 4;
            self.compress_nibble(&mut writer, low);
            self.compress_nibble(&mut writer, high);
        }

        let _ = writer.close();
        let data = writer.data();
        let len = out.len().min(data.len());
        out[..len].copy_from_slice(&data[..len]);
    }

    /// Does the opposite of [Self::data_to_diff16]. If `data` consists of 16-bit integers that look like A, B-A, C-B and so
    /// on, this function will recover the original data A, B, C.
    ///
    /// # Panics
    ///
    /// Panics if `data.len()` is not a multiple of 2.
    pub fn diff16_to_data(&self, data: &mut [u8]) {
        assert!(data.len() % 2 == 0);
        let mut prev = 0;
        for i in (0..data.len()).step_by(2) {
            let curr = u16::from_le_bytes([data[i], data[i + 1]]);
            let value = curr.wrapping_add(prev);
            data[i..i + 2].copy_from_slice(&value.to_le_bytes());
            prev = value;
        }
    }

    /// Differentiates every 16-bit integer in `data`. For example, if the 16-bit integers are called A, B, C and so on, then
    /// they will be differentiated to A, B-A, C-B and so on.
    ///
    /// If `data` has a lot of repeating values, this will result in plenty of zeros. This benefits Huffman compression, as it
    /// compresses better if some values occur more often than others.
    ///
    /// # Panics
    ///
    /// Panics if `data.len()` is not a multiple of 2.
    pub fn data_to_diff16(&self, data: &mut [u8]) {
        assert!(data.len() % 2 == 0);
        let mut prev = 0;
        for i in (0..data.len()).step_by(2) {
            let curr = u16::from_le_bytes([data[i], data[i + 1]]);
            let value = curr.wrapping_sub(prev);
            data[i..i + 2].copy_from_slice(&value.to_le_bytes());
            prev = curr;
        }
    }
}