sit-algos 0.3.0

Implementation of decompression algorithms used by StuffIt Expander and related applications
Documentation
use std::io;

use binrw::BinReaderExt;
use bitstream_io::{BitRead, BitReader, LE};
use cfor::cfor;

use super::installer::hufftree::HuffTree;

#[derive(Debug, thiserror::Error)]
pub enum Error {
    #[error(transparent)]
    BinRw(#[from] binrw::Error),

    #[error(transparent)]
    Io(#[from] io::Error),

    #[error(transparent)]
    Installer(#[from] super::installer::Error),

    #[error("Invalid Reference")]
    DictionaryOutOfBounds,

    #[error("Too Much Ouptut")]
    TooMuchOutput,
}

const fn init_lengths<const N: usize>() -> ([u16; N], [u8; N]) {
    let mut values = [0u16; N];
    let mut paths = [0u8; N];

    let mut k = 0;
    cfor! {let mut i = 0; i < N; i+=1; {
        let value = (i.saturating_sub(4) >> 2) as u8;

        values[i] = k;
        paths[i] = value;

        k = k.wrapping_add(1 << (value & 0x3f));
    }}

    (values, paths)
}

/// This initialization differs from the mainline installer compression offset by only subbing 1 and shiftin 1
const fn init_offsets<const N: usize>() -> ([u16; N], [u8; N]) {
    let mut paths = [0u16; N];
    let mut values = [0u8; N];

    let mut k = 1;
    cfor! {let mut i = 0; i < N; i+=1; {
        let value = (i.saturating_sub(1) >> 1) as u8;

        paths[i] = k;
        values[i] = value;

        k = k.wrapping_add(1 << (value & 0x3f));
    }};

    (paths, values)
}

pub fn decompress(
    mut reader: impl io::Read + io::Seek,
    uncompressed_size: usize,
    dict: &[u8],
) -> Result<Vec<u8>, Error> {
    let (offsets, offsets_bits) = init_offsets::<0x1f>();
    let (lengths, length_bits) = init_lengths::<0x24>();

    let dynamic_code_size: u16 = reader.read_be::<u8>()? as u16 * 2 - 1;
    assert!(dynamic_code_size <= 0x1f);
    let mut reader = BitReader::<_, LE>::new(&mut reader);
    let fixed = HuffTree::read_from(&mut reader, 0x124)?;
    let dynamic = HuffTree::read_from(&mut reader, dynamic_code_size as usize)?;

    let mut output = vec![0u8; uncompressed_size];
    let mut output_ptr = 0;
    while output_ptr < output.len() {
        let mut sym = fixed.read_code(0, &mut reader)? as usize;
        if sym < 0x100 {
            // Emit literal
            output[output_ptr] = (sym & 0xFF) as u8;
            output_ptr += 1;
            continue;
        }
        sym -= 0x100;

        let mut length = lengths[sym] as usize + 3;
        let bits = length_bits[sym] as u32;
        length += reader.read_var::<u32>(bits)? as usize;

        let sym = dynamic.read_code(0, &mut reader)? as usize;
        let mut offset = offsets[sym] as usize;
        let bits = offsets_bits[sym] as u32;
        offset += reader.read_var::<u32>(bits)? as usize;

        if output_ptr + length > output.len() {
            return Err(Error::TooMuchOutput);
        }

        if offset <= output_ptr {
            // Copy from output
            for _ in 0..length {
                output[output_ptr] = output[output_ptr - offset];
                output_ptr += 1;
            }
        } else {
            // Copy from dictionary
            if dict.len() <= (offset - output_ptr) {
                // TODO: Check if it is possible to copy parts from the dict and parts from output
                return Err(Error::DictionaryOutOfBounds);
            }

            let dictionary_offset = dict.len() - (offset - output_ptr);
            output[output_ptr..(output_ptr + length)]
                .copy_from_slice(&dict[dictionary_offset..(dictionary_offset + length)]);
            output_ptr += length;
        }
    }

    Ok(output)
}