vpk0/format.rs
1//! Information and structures for `vpk0` files.
2//!
3//! There are three structures at the start of a `vpk0` file:
4//! 1. Header
5//! 2. Offset Bitsize Huffman Tree
6//! 3. Length Bitsize Huffman Tree
7//!
8//! ## Header
9//! There is a nine byte header at beginning of the file that includes the sample
10//! number for the encoded offsets, as well as the size of the decompressed data.
11//! The key data can be extracted into a [`VpkHeader`] by using [`vpk_info()`].
12//!
13//! | Byte Num | Description |
14//! | :------: | ----------- |
15//! | 0..4 | magic bytes ("vpk0") |
16//! | 4..8 | size in big endian bytes of decompressed data |
17//! | 9 | Sample Method (0 => One Sample; 1 => Two Sample ) |
18//!
19//! ## Huffman Trees
20//! After the header, there are two linearly encoded Huffman trees: one for the offsets,
21//! and one for the lengths.
22//! The two trees can be extracted into their `String` representations as [`TreeInfo`]
23//! by using [`vpk_info()`].
24//!
25//! Tree leafs are encoded as a `0` followed by an eight bit value leaf.
26//! Tree nodes are encoded by a `1`, and combine the most recent two nodes/leaves.
27//
28//! For example, the simple tree `(1, (4, 7))` would be encoded in binary as:
29//! ```text
30//! 0 00000001 0 00000100 0 00000111 1 1
31//! ```
32//! and it would give the following Huffman codes:
33//!
34//! | Bitsize | Code |
35//! | ------- | ---- |
36//! | 1 | 0 |
37//! | 4 | 10 |
38//! | 7 | 11 |
39//!
40//! So, let's say that you wanted to encode an offset of seven with the same tree as above:
41//! ```text
42//! ┌ read next four bits
43//! | ┌ value
44//! 10 0111
45//! ```
46//! Single leaf trees are valid, but the leaf will have a "zero length" huffman code.
47//! Existing decoders return of value of zero for a zero-length tree.
48//!
49//! Note that the trees do not store the actual offset or length value, but
50//! rather they store the number of bits to read for the actual offset or length.
51//!
52//! ### Offset Tree
53//! The offset tree stores the bit sizes for encoding an "LZSS" offset value. The offset tells
54//! the decoder how far to move back in the decoded output before copying back.
55//! While the `vpk0` format can single or double sample encoded offsets, those differences do not
56//! affect the tree.
57//!
58//! ### Length Tree
59//! The length tree stores the bit sizes for encoding an "LZSS" length value. The length tells
60//! the decoder how many bytes copy back from decoded output.
61//!
62//! ## An Example
63//! Let's encode the exciting and useful ascii string "YAAAAAAAAAAAAAA" into a one sample `vpk0` file.
64//! We'll use an inefficient offset and length tree of `(1, (4, 7))`.
65//! The LZSS encoding of the input will be ['Y', 'A', (1, 13)], where (1, 13) is an offset of 1
66//! and a length of 13.
67//! ```text
68//! Header
69//! 76706B30 <- "vpk0"
70//! 0000000F <- original file size of 15 bytes
71//! 00 <- one sample
72//!
73//! Offset Tree (see above for how this was generated)
74//! 0 00000001 0 00000100 0 00000111 1 1
75//! Length Tree
76//! 0 00000001 0 00000100 0 00000111 1 1
77//!
78//! Encoded Data
79//! 0 01011001 <- uncoded ascii 'Y'
80//! 0 01000001 <- uncoded ascii 'A'
81//! ┌ encoded
82//! | ┌ read next 1 bit
83//! | | ┌ offset value (1)
84//! | | | ┌ read next 7 bits
85//! | | | | ┌ length value (13)
86//! 1 0 1 11 0010011
87//! ```
88//! [`vpk_info()`]: crate::vpk_info
89
90use crate::errors::VpkError;
91use bitstream_io::{BitReader, BitWriter, BE};
92use std::convert::TryInto;
93use std::fmt;
94use std::io::{Read, Write};
95use std::str;
96
97// re-export the string representations of the Huffman trees
98// makes more sense to be here for users, imho
99pub use crate::decode::TreeInfo;
100
101/// Valid lookback methods for a VPK compressed file.
102///
103/// `OneSample` directly encodes the offset value in stream,
104/// while `TwoSample` encodes a modified form of the offset as either one
105/// or two values.
106///
107/// The two sample mode encodes an offset by adding eight to the original value,
108/// then dividing that value by four. If there is no remainder,
109/// the quotient is stored as a single sample. Otherwise,
110/// the `remainder - 1` is stored followed by the quotient
111#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
112pub enum VpkMethod {
113 OneSample = 0,
114 TwoSample = 1,
115}
116
117impl fmt::Display for VpkMethod {
118 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
119 match self {
120 Self::OneSample => write!(f, "Method 0 (One Sample)"),
121 Self::TwoSample => write!(f, "Method 1 (Two Sample)"),
122 }
123 }
124}
125
126/// The information stored at the start of a `vpk0` file
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub struct VpkHeader {
129 /// size of decompressed data
130 pub size: u32,
131 pub method: VpkMethod,
132}
133impl VpkHeader {
134 /// Parse VPK header from a byte array
135 fn from_array(arr: &[u8; 9]) -> Result<Self, VpkError> {
136 let name = str::from_utf8(&arr[0..4]).map_err(VpkError::Utf8Error)?;
137 if name != "vpk0" {
138 return Err(VpkError::InvalidHeader(name.into()));
139 }
140
141 let size = u32::from_be_bytes(arr[4..8].try_into().unwrap());
142 let method = match arr[8] {
143 0 => Ok(VpkMethod::OneSample),
144 1 => Ok(VpkMethod::TwoSample),
145 unk => Err(VpkError::InvalidMethod(unk)),
146 }?;
147
148 Ok(Self { size, method })
149 }
150 /// Convenience function to read the `vpk0` header from a bitstream
151 pub(crate) fn from_bitreader<R: Read>(reader: &mut BitReader<R, BE>) -> Result<Self, VpkError> {
152 let mut header = [0u8; 9];
153 reader.read_bytes(&mut header)?;
154
155 Self::from_array(&header)
156 }
157 /// Write out `self` to the big endian `BitWriter` to match the vpk format
158 pub(crate) fn write<W: Write>(&self, wtr: &mut BitWriter<W, BE>) -> Result<(), VpkError> {
159 wtr.write_bytes(b"vpk0")?; // 0..4
160 wtr.write(32, self.size)?; // 4..8
161 let method = match self.method {
162 VpkMethod::OneSample => 0,
163 VpkMethod::TwoSample => 1,
164 };
165 wtr.write(8, method)?; // 8
166
167 Ok(())
168 }
169}
170
171/// A Huffman tree node or leaf designed to be stored in an array
172#[derive(Debug, Clone, Copy, PartialEq, Eq)]
173pub(crate) enum TreeEntry {
174 // left and right are indices into a `HufTree` array
175 Node { left: usize, right: usize },
176 Leaf(u8),
177}
178
179/// An array based huffman tree
180#[derive(Debug, Clone, PartialEq, Eq)]
181pub(crate) struct VpkTree {
182 entries: Vec<TreeEntry>,
183}
184
185impl VpkTree {
186 /// Create an empty tree.
187 /// This will be written to the output buffer as a single true bit (1)
188 pub(crate) fn empty() -> Self {
189 Self {
190 entries: Vec::new(),
191 }
192 }
193 pub(crate) fn from_bitreader<R: Read>(bits: &mut BitReader<R, BE>) -> Result<Self, VpkError> {
194 let mut entries: Vec<TreeEntry> = Vec::new();
195 let mut buf: Vec<usize> = Vec::new();
196
197 loop {
198 let new_entry_idx = entries.len();
199 // create a Node (1) or Leaf (0)
200 if bits.read_bit()? {
201 // if there are less than 2 "outstanding" entries, the tree is done
202 if buf.len() < 2 {
203 break;
204 }
205
206 entries.push(TreeEntry::Node {
207 right: buf.pop().unwrap(),
208 left: buf.pop().unwrap(),
209 });
210 } else {
211 // add a leaf node with an 8-bit value
212 entries.push(TreeEntry::Leaf(bits.read(8)?));
213 }
214 // store a reference to new leaf or node in the buf for later combination
215 buf.push(new_entry_idx);
216 }
217
218 Ok(Self { entries })
219 }
220 /// Use `BitReader` `bits` to read a value out from this `HuffTree`
221 pub(crate) fn read_value<R: Read>(&self, bits: &mut BitReader<R, BE>) -> Result<u32, VpkError> {
222 let tbl = &self.entries;
223 let len = tbl.len();
224 if len == 0 {
225 return Ok(0);
226 };
227 // tree starts from end
228 let mut idx = len - 1;
229 while let TreeEntry::Node { left, right } = tbl[idx] {
230 if bits.read_bit()? {
231 idx = right;
232 } else {
233 idx = left;
234 }
235 }
236 // make a loop -> match set to just return this?
237 match tbl[idx] {
238 TreeEntry::Leaf(size) => Ok(bits.read(size as u32)?),
239 _ => Err(VpkError::BadTreeEncoding),
240 }
241 }
242 /// Write `self` to the Big Endian `BitWriter` in the expected VPK format
243 pub(crate) fn write<W: Write>(&self, wtr: &mut BitWriter<W, BE>) -> Result<(), VpkError> {
244 for entry in &self.entries {
245 match entry {
246 TreeEntry::Leaf(val) => {
247 wtr.write_bit(false)?;
248 wtr.write(8, *val)?;
249 }
250 TreeEntry::Node { .. } => {
251 wtr.write_bit(true)?;
252 }
253 }
254 }
255 // end tree
256 wtr.write_bit(true).map_err(Into::into)
257 }
258
259 fn _format_entry(&self, entry: usize, f: &mut fmt::Formatter) -> fmt::Result {
260 match self.entries[entry] {
261 TreeEntry::Leaf(val) => write!(f, "{}", val),
262 TreeEntry::Node { left, right } => {
263 write!(f, "(")?;
264 self._format_entry(left, f)?;
265 write!(f, ", ")?;
266 self._format_entry(right, f)?;
267 write!(f, ")")
268 }
269 }
270 }
271}
272
273impl fmt::Display for VpkTree {
274 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
275 if self.entries.is_empty() {
276 write!(f, "()")
277 } else {
278 self._format_entry(self.entries.len() - 1, f)
279 }
280 }
281}
282
283impl From<Vec<TreeEntry>> for VpkTree {
284 fn from(entries: Vec<TreeEntry>) -> Self {
285 Self { entries }
286 }
287}