crunch64/
yaz0.rs

1// Based on https://gist.github.com/Mr-Wiseguy/6cca110d74b32b5bb19b76cfa2d7ab4f
2
3use crate::{utils, Crunch64Error};
4
5fn parse_header(bytes: &[u8]) -> Result<usize, Crunch64Error> {
6    if bytes.len() < 0x10 {
7        return Err(Crunch64Error::InvalidYaz0Header);
8    }
9
10    if &bytes[0..4] != b"Yaz0" {
11        return Err(Crunch64Error::InvalidYaz0Header);
12    }
13
14    if bytes[8..0x10] != [0u8; 8] {
15        return Err(Crunch64Error::InvalidYaz0Header);
16    }
17
18    Ok(utils::read_u32(bytes, 4)? as usize)
19}
20
21fn write_header(dst: &mut Vec<u8>, uncompressed_size: usize) -> Result<(), Crunch64Error> {
22    dst.extend(b"Yaz0");
23    dst.extend((uncompressed_size as u32).to_be_bytes());
24    // padding
25    dst.extend(&[0u8; 8]);
26
27    Ok(())
28}
29
30pub fn decompress(bytes: &[u8]) -> Result<Box<[u8]>, Crunch64Error> {
31    let uncompressed_size = parse_header(bytes)?;
32
33    // Skip the header
34    let mut index_src = 0x10;
35    let mut index_dst = 0;
36
37    let mut ret = vec![0u8; uncompressed_size];
38
39    while index_src < bytes.len() {
40        let mut layout_bit_index = 0;
41        let mut layout_bits = bytes[index_src];
42        index_src += 1;
43
44        while (layout_bit_index < 8) && (index_src < bytes.len()) && (index_dst < uncompressed_size)
45        {
46            if (layout_bits & 0x80) != 0 {
47                ret[index_dst] = bytes[index_src];
48                index_src += 1;
49                index_dst += 1;
50            } else {
51                let first_byte = bytes[index_src];
52                index_src += 1;
53                let second_byte = bytes[index_src];
54                index_src += 1;
55                let byte_pair = ((first_byte as u16) << 8) | (second_byte as u16);
56                let offset = (byte_pair & 0x0FFF) + 1;
57                let mut length: usize;
58
59                // Check how the group length is encoded
60                if (first_byte & 0xF0) == 0 {
61                    // 3 byte encoding, 0RRRNN
62                    let third_byte = bytes[index_src];
63                    index_src += 1;
64                    length = (third_byte as usize) + 0x12;
65                } else {
66                    // 2 byte encoding, NRRR
67                    length = (((byte_pair & 0xF000) >> 12) + 2) as usize;
68                }
69
70                while length > 0 {
71                    ret[index_dst] = ret[index_dst - offset as usize];
72                    index_dst += 1;
73                    length -= 1;
74                }
75            }
76
77            layout_bit_index += 1;
78            layout_bits <<= 1;
79        }
80    }
81
82    Ok(ret.into_boxed_slice())
83}
84
85fn size_for_compressed_buffer(input_size: usize) -> Result<usize, Crunch64Error> {
86    // Worst-case size for output is zero compression on the input, meaning the input size plus the number of layout bytes plus the Yaz0 header.
87    // There would be one layout byte for every 8 input bytes, so the worst-case size is:
88    //   input_size + ROUND_UP_DIVIDE(input_size, 8) + 0x10
89    Ok(input_size + input_size.div_ceil(8) + 0x10)
90}
91
92pub fn compress(bytes: &[u8]) -> Result<Box<[u8]>, Crunch64Error> {
93    let input_size = bytes.len();
94
95    let mut output: Vec<u8> = Vec::with_capacity(size_for_compressed_buffer(input_size)?);
96    let mut window = utils::Window::new(bytes);
97
98    write_header(&mut output, input_size)?;
99
100    let mut index_cur_layout_byte: usize = 0x10;
101    let mut index_out_ptr: usize = index_cur_layout_byte;
102    let mut input_pos: usize = 0;
103    let mut cur_layout_bit: u8 = 1;
104
105    while input_pos < input_size {
106        // Advance to the next layout bit
107        cur_layout_bit >>= 1;
108
109        if cur_layout_bit == 0 {
110            cur_layout_bit = 0x80;
111            index_cur_layout_byte = index_out_ptr;
112            output.push(0);
113            index_out_ptr += 1;
114        }
115
116        let (mut group_pos, mut group_size) = window.search(input_pos, 0x111);
117
118        // If the group isn't larger than 2 bytes, copying the input without compression is smaller
119        if group_size <= 2 {
120            // Set the current layout bit to indicate that this is an uncompressed byte
121            output[index_cur_layout_byte] |= cur_layout_bit;
122            output.push(bytes[input_pos]);
123            input_pos += 1;
124            index_out_ptr += 1;
125        } else {
126            // Search for a new group after one position after the current one
127            let (new_position, new_size) = window.search(input_pos + 1, 0x111);
128
129            // If the new group is better than the current group by at least 2 bytes, use it instead
130            if new_size >= group_size + 2 {
131                // Mark the current layout bit to skip compressing this byte, as the next input position yielded better compression
132                output[index_cur_layout_byte] |= cur_layout_bit;
133                // Copy the input byte to the output
134                output.push(bytes[input_pos]);
135                input_pos += 1;
136                index_out_ptr += 1;
137
138                // Advance to the next layout bit
139                cur_layout_bit >>= 1;
140
141                if cur_layout_bit == 0 {
142                    cur_layout_bit = 0x80;
143                    index_cur_layout_byte = index_out_ptr;
144                    output.push(0);
145                    index_out_ptr += 1;
146                }
147
148                group_size = new_size;
149                group_pos = new_position;
150            }
151
152            // Calculate the offset for the current group
153            let group_offset = input_pos as u32 - group_pos - 1;
154
155            // Determine which encoding to use for the current group
156            if group_size >= 0x12 {
157                // Three bytes, 0RRRNN
158                output.push((group_offset >> 8) as u8);
159                index_out_ptr += 1;
160                output.push((group_offset & 0xFF) as u8);
161                index_out_ptr += 1;
162                output.push((group_size - 0x12) as u8);
163                index_out_ptr += 1;
164            } else {
165                // Two bytes, NRRR
166                output.push((group_offset >> 8) as u8 | ((group_size - 2) << 4) as u8);
167                index_out_ptr += 1;
168                output.push((group_offset & 0xFF) as u8);
169                index_out_ptr += 1;
170            }
171
172            // Move forward in the input by the size of the group
173            input_pos += group_size as usize;
174        }
175    }
176
177    Ok(output.into_boxed_slice())
178}
179
180#[cfg(feature = "c_bindings")]
181mod c_bindings {
182    #[no_mangle]
183    pub extern "C" fn crunch64_yaz0_decompress_bound(
184        dst_size: *mut usize,
185        src_len: usize,
186        src: *const u8,
187    ) -> super::Crunch64Error {
188        if src_len < 0x10 {
189            return super::Crunch64Error::OutOfBounds;
190        }
191
192        if dst_size.is_null() || src.is_null() {
193            return super::Crunch64Error::NullPointer;
194        }
195
196        let bytes = match super::utils::u8_vec_from_pointer_array(0x10, src) {
197            Err(e) => return e,
198            Ok(data) => data,
199        };
200
201        match super::parse_header(&bytes) {
202            Err(e) => return e,
203            Ok(value) => unsafe { *dst_size = value },
204        }
205
206        super::Crunch64Error::Okay
207    }
208
209    #[no_mangle]
210    pub extern "C" fn crunch64_yaz0_decompress(
211        dst_len: *mut usize,
212        dst: *mut u8,
213        src_len: usize,
214        src: *const u8,
215    ) -> super::Crunch64Error {
216        if dst_len.is_null() || dst.is_null() || src.is_null() {
217            return super::Crunch64Error::NullPointer;
218        }
219
220        let bytes = match super::utils::u8_vec_from_pointer_array(src_len, src) {
221            Err(e) => return e,
222            Ok(d) => d,
223        };
224
225        let data = match super::decompress(&bytes) {
226            Err(e) => return e,
227            Ok(d) => d,
228        };
229
230        if let Err(e) = super::utils::set_pointer_array_from_u8_array(dst_len, dst, &data) {
231            return e;
232        }
233
234        super::Crunch64Error::Okay
235    }
236
237    #[no_mangle]
238    pub extern "C" fn crunch64_yaz0_compress_bound(
239        dst_size: *mut usize,
240        src_len: usize,
241        src: *const u8,
242    ) -> super::Crunch64Error {
243        if dst_size.is_null() || src.is_null() {
244            return super::Crunch64Error::NullPointer;
245        }
246
247        match super::size_for_compressed_buffer(src_len) {
248            Err(e) => return e,
249            Ok(uncompressed_size) => unsafe { *dst_size = uncompressed_size },
250        }
251
252        super::Crunch64Error::Okay
253    }
254
255    #[no_mangle]
256    pub extern "C" fn crunch64_yaz0_compress(
257        dst_len: *mut usize,
258        dst: *mut u8,
259        src_len: usize,
260        src: *const u8,
261    ) -> super::Crunch64Error {
262        if dst_len.is_null() || dst.is_null() || src.is_null() {
263            return super::Crunch64Error::NullPointer;
264        }
265
266        let bytes = match super::utils::u8_vec_from_pointer_array(src_len, src) {
267            Err(e) => return e,
268            Ok(d) => d,
269        };
270
271        let data = match super::compress(&bytes) {
272            Err(e) => return e,
273            Ok(d) => d,
274        };
275
276        if let Err(e) = super::utils::set_pointer_array_from_u8_array(dst_len, dst, &data) {
277            return e;
278        }
279
280        super::Crunch64Error::Okay
281    }
282}
283
284#[cfg(feature = "python_bindings")]
285pub(crate) mod python_bindings {
286    use pyo3::prelude::*;
287    use std::borrow::Cow;
288
289    /**
290     * We use a `Cow` instead of a plain &[u8] because the latter only allows Python's
291     * `bytes` objects, while `Cow`` allows for both `bytes` and `bytearray`.
292     * This is important because an argument typed as `bytes` allows to pass a
293     * `bytearray` object too.
294     */
295
296    #[pyfunction]
297    pub(crate) fn decompress_yaz0(bytes: Cow<[u8]>) -> Result<Cow<[u8]>, super::Crunch64Error> {
298        Ok(Cow::Owned(super::decompress(&bytes)?.into()))
299    }
300
301    #[pyfunction]
302    pub(crate) fn compress_yaz0(bytes: Cow<[u8]>) -> Result<Cow<[u8]>, super::Crunch64Error> {
303        Ok(Cow::Owned(super::compress(&bytes)?.into()))
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use crate::Crunch64Error;
310    use core::panic;
311    use rstest::rstest;
312    use std::{
313        fs::File,
314        io::{BufReader, Read},
315        path::PathBuf,
316    };
317
318    pub fn read_test_file(path: PathBuf) -> Vec<u8> {
319        let file = match File::open(path) {
320            Ok(file) => file,
321            Err(_error) => {
322                panic!("Failed to open file");
323            }
324        };
325
326        let mut buf_reader = BufReader::new(file);
327        let mut buffer = Vec::new();
328
329        let _ = buf_reader.read_to_end(&mut buffer);
330
331        buffer
332    }
333
334    #[rstest]
335    fn test_matching_decompression(
336        #[files("../test_data/*.Yaz0")] path: PathBuf,
337    ) -> Result<(), Crunch64Error> {
338        let compressed_file = &read_test_file(path.clone());
339        let decompressed_file = &read_test_file(path.with_extension(""));
340
341        let decompressed: Box<[u8]> = super::decompress(compressed_file)?;
342        assert_eq!(decompressed_file, decompressed.as_ref());
343        Ok(())
344    }
345
346    #[rstest]
347    fn test_matching_compression(
348        #[files("../test_data/*.Yaz0")] path: PathBuf,
349    ) -> Result<(), Crunch64Error> {
350        let compressed_file = &read_test_file(path.clone());
351        let decompressed_file = &read_test_file(path.with_extension(""));
352
353        let compressed = super::compress(decompressed_file.as_slice())?;
354        assert_eq!(compressed_file, compressed.as_ref());
355        Ok(())
356    }
357
358    #[rstest]
359    fn test_cycle_decompressed(
360        #[files("../test_data/*.Yaz0")] path: PathBuf,
361    ) -> Result<(), Crunch64Error> {
362        let decompressed_file = &read_test_file(path.with_extension(""));
363
364        assert_eq!(
365            decompressed_file,
366            super::decompress(&super::compress(decompressed_file.as_ref())?)?.as_ref()
367        );
368        Ok(())
369    }
370
371    #[rstest]
372    fn test_cycle_compressed(
373        #[files("../test_data/*.Yaz0")] path: PathBuf,
374    ) -> Result<(), Crunch64Error> {
375        let compressed_file = &read_test_file(path);
376
377        assert_eq!(
378            compressed_file,
379            super::compress(&super::decompress(compressed_file.as_ref())?)?.as_ref()
380        );
381        Ok(())
382    }
383}