1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
use std::fmt;
use std::num::NonZeroU8;
use std::ops::Range;
use crate::{Bitstream, DecodeFailed};
/// The canonical tree cannot be used to decode elements. Instead, it behaves like a builder for
/// instances of the actual tree that can decode elements efficiently.
#[derive(Debug)]
pub struct CanonicalTree {
// > Each tree element can have a path length of [0, 16], where a zero path length indicates
// > that the element has a zero frequency and is not present in the tree.
//
// We represent them as `u8` due to their very short range.
path_lengths: Vec<u8>,
}
pub struct Tree {
path_lengths: Vec<u8>,
largest_length: NonZeroU8,
huffman_tree: Vec<u16>,
}
impl CanonicalTree {
pub fn new(count: usize) -> Self {
Self {
// > In the case of the very first such tree, the delta is calculated against a tree
// > in which all elements have a zero path length.
path_lengths: vec![0; count],
}
}
/// Create a new `Tree` instance from this cast that can be used to decode elements. If the
/// resulting tree is empty (all path lengths are 0), then `Ok(None)` is returned.
///
/// This method transforms the canonical Huffman tree into a different structure that can
/// be used to better decode elements.
// > an LZXD decoder uses only the path lengths of the Huffman tree to reconstruct the
// > identical tree,
pub fn create_instance_allow_empty(&self) -> Result<Option<Tree>, DecodeFailed> {
// The ideas implemented by this method are heavily inspired from LeonBlade's xnbcli
// on GitHub.
//
// The path lengths contains the bit indices or zero if its not present, so find the
// highest path length to determine how big our tree needs to be.
let largest_length =
match NonZeroU8::new(*self.path_lengths.iter().max().expect("empty path lengths")) {
Some(x) => x,
// N.B: If all the path lengths are zero, then the tree is empty (which is allowed).
None => return Ok(None),
};
let mut huffman_tree = vec![0; 1 << largest_length.get()];
// > a zero path length indicates that the element has a zero frequency and is not
// > present in the tree. Tree elements are output in sequential order starting with the
// > first element
//
// We start at the MSB, 1, and write the tree elements in sequential order from index 0.
let mut pos = 0;
for bit in 1..=largest_length.get() {
let amount = 1 << (largest_length.get() - bit);
// The codes correspond with the indices of the path length (because
// `path_lengths[code]` is its path length).
for code in 0..self.path_lengths.len() {
// As soon as a code's path length matches with our bit index write the code as
// many times as the bit index itself represents.
if self.path_lengths[code] == bit {
huffman_tree
.get_mut(pos..pos + amount)
.ok_or(DecodeFailed::InvalidPathLengths)?
.iter_mut()
.for_each(|x| *x = code as u16);
pos += amount;
}
}
}
// If we didn't fill the entire table, the path lengths were wrong.
if pos != huffman_tree.len() {
Err(DecodeFailed::InvalidPathLengths)?;
}
Ok(Some(Tree {
path_lengths: self.path_lengths.clone(),
largest_length,
huffman_tree,
}))
}
/// Create a new `Tree` instance from this cast that can be used to decode elements.
///
/// This method transforms the canonical Huffman tree into a different structure that can
/// be used to better decode elements.
// > an LZXD decoder uses only the path lengths of the Huffman tree to reconstruct the
// > identical tree,
pub fn create_instance(&self) -> Result<Tree, DecodeFailed> {
self.create_instance_allow_empty()?
.ok_or(DecodeFailed::EmptyTree)
}
// Note: the tree already exists and is used to apply the deltas.
pub fn update_range_with_pretree(
&mut self,
bitstream: &mut Bitstream,
range: Range<usize>,
) -> Result<(), DecodeFailed> {
// > Each of the 17 possible values of (len[x] - prev_len[x]) mod 17, plus three
// > additional codes used for run-length encoding, are not output directly as 5-bit
// > numbers but are instead encoded via a Huffman tree called the pretree. The pretree
// > is generated dynamically according to the frequencies of the 20 allowable tree
// > codes. The structure of the pretree is encoded in a total of 80 bits by using 4 bits
// > to output the path length of each of the 20 pretree elements. Once again, a zero
// > path length indicates a zero-frequency element.
let pretree = {
let mut path_lengths = Vec::with_capacity(20);
for _ in 0..20 {
path_lengths.push(bitstream.read_bits(4)? as u8)
}
Tree::from_path_lengths(path_lengths)?
};
// > Tree elements are output in sequential order starting with the first element.
let mut i = range.start;
while i < range.end {
// > The "real" tree is then encoded using the pretree Huffman codes.
let code = pretree.decode_element(bitstream)?;
// > Elements can be encoded in one of two ways: if several consecutive elements have
// > the same path length, run-length encoding is employed; otherwise, the element is
// > output by encoding the difference between the current path length and the
// > previous path length of the tree, mod 17.
match code {
0..=16 => {
self.path_lengths[i] = (17 + self.path_lengths[i] - code as u8) % 17;
i += 1;
}
// > Codes 17, 18, and 19 are used to represent consecutive elements that have the
// > same path length.
17 => {
let zeros = bitstream.read_bits(4)?;
self.path_lengths
.get_mut(i..i + zeros as usize + 4)
.ok_or(DecodeFailed::InvalidPretreeRle)?
.iter_mut()
.for_each(|x| *x = 0);
i += zeros as usize + 4;
}
18 => {
let zeros = bitstream.read_bits(5)?;
self.path_lengths
.get_mut(i..i + zeros as usize + 20)
.ok_or(DecodeFailed::InvalidPretreeRle)?
.iter_mut()
.for_each(|x| *x = 0);
i += zeros as usize + 20;
}
19 => {
let same = bitstream.read_bits(1)?;
// "Decode new code" is used to parse the next code from the bitstream, which
// has a value range of [0, 16].
let code = pretree.decode_element(bitstream)?;
if code > 16 {
return Err(DecodeFailed::InvalidPretreeElement(code))?;
}
let value = (17 + self.path_lengths[i] - code as u8) % 17;
self.path_lengths
.get_mut(i..i + same as usize + 4)
.ok_or(DecodeFailed::InvalidPretreeRle)?
.iter_mut()
.for_each(|x| *x = value);
i += same as usize + 4;
}
_ => return Err(DecodeFailed::InvalidPretreeElement(code)),
};
}
Ok(())
}
}
impl Tree {
/// Create a new usable tree instance directly from known path lengths.
pub fn from_path_lengths(path_lengths: Vec<u8>) -> Result<Self, DecodeFailed> {
CanonicalTree { path_lengths }.create_instance()
}
pub fn decode_element(&self, bitstream: &mut Bitstream) -> Result<u16, DecodeFailed> {
// Perform the inverse translation, peeking as many bits as our tree is…
let code = self.huffman_tree[bitstream.peek_bits(self.largest_length.get()) as usize];
// …and advancing the stream for as many bits this code actually takes (read to seek).
bitstream.read_bits(self.path_lengths[code as usize])?;
Ok(code)
}
}
impl fmt::Debug for Tree {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Tree")
.field("path_lengths", &self.path_lengths.len())
.field("largest_length", &self.largest_length)
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn decode_simple_table() {
// Based on some aligned offset tree
let tree = Tree::from_path_lengths(vec![6, 5, 1, 3, 4, 6, 2, 0]).unwrap();
let value_count = vec![(2, 32), (6, 16), (3, 8), (4, 4), (1, 2), (0, 1), (5, 1)];
let mut i = 0;
for (value, count) in value_count.into_iter() {
(0..count).for_each(|_| {
assert_eq!(tree.huffman_tree[i], value);
i += 1;
})
}
}
#[test]
fn decode_complex_table() {
// Based on the pretree of some length tree
let tree = Tree::from_path_lengths(vec![
1, 0, 0, 0, 0, 7, 3, 3, 4, 4, 5, 5, 5, 7, 8, 8, 0, 7, 0, 0,
])
.unwrap();
let value_count = vec![
(0, 128),
(6, 32),
(7, 32),
(8, 16),
(9, 16),
(10, 8),
(11, 8),
(12, 8),
(5, 2),
(13, 2),
(17, 2),
(14, 1),
(15, 1),
];
let mut i = 0;
for (value, count) in value_count.into_iter() {
(0..count).for_each(|_| {
assert_eq!(tree.huffman_tree[i], value);
i += 1;
})
}
}
#[test]
fn decode_elements() {
let tree = Tree::from_path_lengths(vec![6, 5, 1, 3, 4, 6, 2, 0]).unwrap();
let buffer = [0x5b, 0xda, 0x3f, 0xf8];
let mut bitstream = Bitstream::new(&buffer);
bitstream.read_bits(11).unwrap();
assert_eq!(tree.decode_element(&mut bitstream), Ok(3));
assert_eq!(tree.decode_element(&mut bitstream), Ok(5));
assert_eq!(tree.decode_element(&mut bitstream), Ok(6));
assert_eq!(tree.decode_element(&mut bitstream), Ok(2));
}
}