vpk0/
format.rs

1//! Information and structures for `vpk0` files.
2//!
3//! There are three structures at the start of a `vpk0` file:
4//! 1. Header
5//! 2. Offset Bitsize Huffman Tree
6//! 3. Length Bitsize Huffman Tree
7//!
8//! ## Header
9//! There is a nine byte header at beginning of the file that includes the sample
10//! number for the encoded offsets, as well as the size of the decompressed data.
11//! The key data can be extracted into a [`VpkHeader`] by using [`vpk_info()`].
12//!
13//! | Byte Num | Description |
14//! | :------: | ----------- |
15//! | 0..4     | magic bytes ("vpk0") |
16//! | 4..8     | size in big endian bytes of decompressed data |
17//! | 9        | Sample Method (0 => One Sample; 1 => Two Sample ) |
18//!
19//! ## Huffman Trees
20//! After the header, there are two linearly encoded Huffman trees: one for the offsets,
21//! and one for the lengths.
22//! The two trees can be extracted into their `String` representations as [`TreeInfo`]
23//! by using [`vpk_info()`].
24//!
25//! Tree leafs are encoded as a `0` followed by an eight bit value leaf.
26//! Tree nodes are encoded by a `1`, and combine the most recent two nodes/leaves.
27//
28//! For example, the simple tree `(1, (4, 7))` would be encoded in binary as:
29//! ```text
30//! 0 00000001 0 00000100 0 00000111 1 1
31//! ```
32//! and it would give the following Huffman codes:
33//!
34//! | Bitsize | Code |
35//! | ------- | ---- |
36//! |    1    |  0   |
37//! |    4    |  10  |
38//! |    7    |  11  |
39//!
40//! So, let's say that you wanted to encode an offset of seven with the same tree as above:
41//! ```text
42//! ┌ read next four bits
43//! |  ┌ value
44//! 10 0111
45//! ```
46//! Single leaf trees are valid, but the leaf will have a "zero length" huffman code.
47//! Existing decoders return of value of zero for a zero-length tree.
48//!
49//! Note that the trees do not store the actual offset or length value, but
50//! rather they store the number of bits to read for the actual offset or length.
51//!
52//! ### Offset Tree
53//! The offset tree stores the bit sizes for encoding an "LZSS" offset value. The offset tells
54//! the decoder how far to move back in the decoded output before copying back.
55//! While the `vpk0` format can single or double sample encoded offsets, those differences do not
56//! affect the tree.
57//!
58//! ### Length Tree
59//! The length tree stores the bit sizes for encoding an "LZSS" length value. The length tells
60//! the decoder how many bytes copy back from decoded output.
61//!
62//! ## An Example
63//! Let's encode the exciting and useful ascii string "YAAAAAAAAAAAAAA" into a one sample `vpk0` file.
64//! We'll use an inefficient offset and length tree of `(1, (4, 7))`.
65//! The LZSS encoding of the input will be ['Y', 'A', (1, 13)], where (1, 13) is an offset of 1
66//! and a length of 13.
67//! ```text
68//! Header
69//! 76706B30 <- "vpk0"
70//! 0000000F <- original file size of 15 bytes
71//! 00       <- one sample
72//!
73//! Offset Tree (see above for how this was generated)
74//! 0 00000001 0 00000100 0 00000111 1 1
75//! Length Tree
76//! 0 00000001 0 00000100 0 00000111 1 1
77//!
78//! Encoded Data
79//! 0 01011001 <- uncoded ascii 'Y'
80//! 0 01000001 <- uncoded ascii 'A'
81//! ┌ encoded
82//! | ┌ read next 1 bit
83//! | | ┌ offset value (1)
84//! | | | ┌ read next 7 bits
85//! | | | |  ┌ length value (13)
86//! 1 0 1 11 0010011
87//! ```
88//! [`vpk_info()`]: crate::vpk_info
89
90use crate::errors::VpkError;
91use bitstream_io::{BitReader, BitWriter, BE};
92use std::convert::TryInto;
93use std::fmt;
94use std::io::{Read, Write};
95use std::str;
96
97// re-export the string representations of the Huffman trees
98// makes more sense to be here for users, imho
99pub use crate::decode::TreeInfo;
100
101/// Valid lookback methods for a VPK compressed file.
102///
103/// `OneSample` directly encodes the offset value in stream,
104/// while `TwoSample` encodes a modified form of the offset as either one
105/// or two values.
106///
107/// The two sample mode encodes an offset by adding eight to the original value,
108/// then dividing that value by four. If there is no remainder,
109/// the quotient is stored as a single sample. Otherwise,
110/// the `remainder - 1` is stored followed by the quotient
111#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
112pub enum VpkMethod {
113    OneSample = 0,
114    TwoSample = 1,
115}
116
117impl fmt::Display for VpkMethod {
118    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
119        match self {
120            Self::OneSample => write!(f, "Method 0 (One Sample)"),
121            Self::TwoSample => write!(f, "Method 1 (Two Sample)"),
122        }
123    }
124}
125
126/// The information stored at the start of a `vpk0` file
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub struct VpkHeader {
129    /// size of decompressed data
130    pub size: u32,
131    pub method: VpkMethod,
132}
133impl VpkHeader {
134    /// Parse VPK header from a byte array
135    fn from_array(arr: &[u8; 9]) -> Result<Self, VpkError> {
136        let name = str::from_utf8(&arr[0..4]).map_err(VpkError::Utf8Error)?;
137        if name != "vpk0" {
138            return Err(VpkError::InvalidHeader(name.into()));
139        }
140
141        let size = u32::from_be_bytes(arr[4..8].try_into().unwrap());
142        let method = match arr[8] {
143            0 => Ok(VpkMethod::OneSample),
144            1 => Ok(VpkMethod::TwoSample),
145            unk => Err(VpkError::InvalidMethod(unk)),
146        }?;
147
148        Ok(Self { size, method })
149    }
150    /// Convenience function to read the `vpk0` header from a bitstream
151    pub(crate) fn from_bitreader<R: Read>(reader: &mut BitReader<R, BE>) -> Result<Self, VpkError> {
152        let mut header = [0u8; 9];
153        reader.read_bytes(&mut header)?;
154
155        Self::from_array(&header)
156    }
157    /// Write out `self` to the big endian `BitWriter` to match the vpk format
158    pub(crate) fn write<W: Write>(&self, wtr: &mut BitWriter<W, BE>) -> Result<(), VpkError> {
159        wtr.write_bytes(b"vpk0")?; // 0..4
160        wtr.write(32, self.size)?; // 4..8
161        let method = match self.method {
162            VpkMethod::OneSample => 0,
163            VpkMethod::TwoSample => 1,
164        };
165        wtr.write(8, method)?; // 8
166
167        Ok(())
168    }
169}
170
171/// A Huffman tree node or leaf designed to be stored in an array
172#[derive(Debug, Clone, Copy, PartialEq, Eq)]
173pub(crate) enum TreeEntry {
174    // left and right are indices into a `HufTree` array
175    Node { left: usize, right: usize },
176    Leaf(u8),
177}
178
179/// An array based huffman tree
180#[derive(Debug, Clone, PartialEq, Eq)]
181pub(crate) struct VpkTree {
182    entries: Vec<TreeEntry>,
183}
184
185impl VpkTree {
186    /// Create an empty tree.
187    /// This will be written to the output buffer as a single true bit (1)
188    pub(crate) fn empty() -> Self {
189        Self {
190            entries: Vec::new(),
191        }
192    }
193    pub(crate) fn from_bitreader<R: Read>(bits: &mut BitReader<R, BE>) -> Result<Self, VpkError> {
194        let mut entries: Vec<TreeEntry> = Vec::new();
195        let mut buf: Vec<usize> = Vec::new();
196
197        loop {
198            let new_entry_idx = entries.len();
199            // create a Node (1) or Leaf (0)
200            if bits.read_bit()? {
201                // if there are less than 2 "outstanding" entries, the tree is done
202                if buf.len() < 2 {
203                    break;
204                }
205
206                entries.push(TreeEntry::Node {
207                    right: buf.pop().unwrap(),
208                    left: buf.pop().unwrap(),
209                });
210            } else {
211                // add a leaf node with an 8-bit value
212                entries.push(TreeEntry::Leaf(bits.read(8)?));
213            }
214            // store a reference to new leaf or node in the buf for later combination
215            buf.push(new_entry_idx);
216        }
217
218        Ok(Self { entries })
219    }
220    /// Use `BitReader` `bits` to read a value out from this `HuffTree`
221    pub(crate) fn read_value<R: Read>(&self, bits: &mut BitReader<R, BE>) -> Result<u32, VpkError> {
222        let tbl = &self.entries;
223        let len = tbl.len();
224        if len == 0 {
225            return Ok(0);
226        };
227        // tree starts from end
228        let mut idx = len - 1;
229        while let TreeEntry::Node { left, right } = tbl[idx] {
230            if bits.read_bit()? {
231                idx = right;
232            } else {
233                idx = left;
234            }
235        }
236        // make a loop -> match set to just return this?
237        match tbl[idx] {
238            TreeEntry::Leaf(size) => Ok(bits.read(size as u32)?),
239            _ => Err(VpkError::BadTreeEncoding),
240        }
241    }
242    /// Write `self` to the Big Endian `BitWriter` in the expected VPK format
243    pub(crate) fn write<W: Write>(&self, wtr: &mut BitWriter<W, BE>) -> Result<(), VpkError> {
244        for entry in &self.entries {
245            match entry {
246                TreeEntry::Leaf(val) => {
247                    wtr.write_bit(false)?;
248                    wtr.write(8, *val)?;
249                }
250                TreeEntry::Node { .. } => {
251                    wtr.write_bit(true)?;
252                }
253            }
254        }
255        // end tree
256        wtr.write_bit(true).map_err(Into::into)
257    }
258
259    fn _format_entry(&self, entry: usize, f: &mut fmt::Formatter) -> fmt::Result {
260        match self.entries[entry] {
261            TreeEntry::Leaf(val) => write!(f, "{}", val),
262            TreeEntry::Node { left, right } => {
263                write!(f, "(")?;
264                self._format_entry(left, f)?;
265                write!(f, ", ")?;
266                self._format_entry(right, f)?;
267                write!(f, ")")
268            }
269        }
270    }
271}
272
273impl fmt::Display for VpkTree {
274    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
275        if self.entries.is_empty() {
276            write!(f, "()")
277        } else {
278            self._format_entry(self.entries.len() - 1, f)
279        }
280    }
281}
282
283impl From<Vec<TreeEntry>> for VpkTree {
284    fn from(entries: Vec<TreeEntry>) -> Self {
285        Self { entries }
286    }
287}