sit-algos 0.3.0

Implementation of decompression algorithms used by StuffIt Expander and related applications
Documentation
use std::{io, marker::PhantomData};

use bitstream_io::{BitRead as _, BitReader, LE};
use cfor::cfor;

pub(crate) mod hufftree;
mod window;

use hufftree::HuffTree;

use crate::installer::window::Window;

pub struct InstallerReader<R> {
    inner: io::Cursor<Vec<u8>>,
    marker: PhantomData<R>,
}

impl<R: io::Read + io::Seek> InstallerReader<R> {
    pub fn try_new(inner: R) -> Result<Self, io::Error> {
        let data = decompress(inner).map_err(io::Error::other)?;
        let inner = io::Cursor::new(data);

        Ok(Self {
            inner,
            marker: PhantomData,
        })
    }
}

// TODO: Improve error handling
#[derive(thiserror::Error, Debug)]
pub enum Error {
    #[error(transparent)]
    BinRw(#[from] binrw::Error),

    #[error(transparent)]
    Io(#[from] io::Error),

    #[error("invalid")]
    InvalidTree,

    #[error("invalid")]
    TreeEncodingUnknown,

    #[error("invalid")]
    DictionaryOutOfBounds,

    #[error("invalid")]
    TooMuchOutput,
}

pub const fn init_lengths<const N: usize>() -> ([u16; N], [u8; N]) {
    let mut values = [0u16; N];
    let mut paths = [0u8; N];

    let mut k = 0;
    cfor! {let mut i = 0; i < N; i+=1; {
        let value = (i.saturating_sub(4) >> 2) as u8;

        values[i] = k;
        paths[i] = value;

        k = k.wrapping_add(1u16.wrapping_shl(value as u32));
    }}

    (values, paths)
}

pub const fn init_offsets<const N: usize>() -> ([u32; N], [u8; N]) {
    let mut paths = [0u32; N];
    let mut values = [0u8; N];

    let mut k = 1;
    cfor! {let mut i = 0; i < N; i+=1; {
        let value = (i.saturating_sub(3) >> 2) as u8;

        paths[i] = k;
        values[i] = value;

        k = k.wrapping_add(1u32.wrapping_shl(value as u32));
    }};

    (paths, values)
}

pub fn decompress(mut reader: impl io::Read + io::Seek) -> Result<Vec<u8>, Error> {
    // TODO: Decompress byte- or at least block-wise to avoid allocating too much memory
    let (offsets, offsets_bits) = init_offsets::<0x4b>();
    let (lengths, length_bits) = init_lengths::<0x34>();

    let mut total_output = Vec::new();
    let mut win = Window::<0x40000>::new();

    let mut reader = BitReader::<_, LE>::new(&mut reader);
    let block_count = reader.read_var::<u32>(16)? as usize;

    for _ in 0..block_count {
        let _compressed_block_size = reader.read_var::<u32>(32)? as usize;
        let uncompressed_block_size = reader.read_var::<u32>(32)? as usize;
        let syms_code = HuffTree::read_from(&mut reader, 0x134)?;
        let offsets_codes = HuffTree::read_from(&mut reader, 0x4b)?;

        let mut output = vec![0u8; uncompressed_block_size];
        let mut output_ptr = 0;
        while output_ptr < output.len() {
            let mut sym = syms_code.read_code(0, &mut reader)? as usize;
            if sym < 0x100 {
                // Emit literal
                output[output_ptr] = win.put((sym & 0xFF) as u8);
                output_ptr += 1;
                continue;
            }
            sym -= 0x100;

            let mut length = lengths[sym] as usize + 4;
            let bits = length_bits[sym] as u32;
            length += reader.read_var::<u32>(bits)? as usize;

            let sym = offsets_codes.read_code(0, &mut reader)? as usize;
            let mut offset = offsets[sym] as usize;
            let bits = offsets_bits[sym] as u32;
            offset += reader.read_var::<u32>(bits)? as usize;

            if output_ptr + length > output.len() {
                return Err(Error::TooMuchOutput);
            }

            // Copy from window
            for i in 0..length {
                let v = win.get(offset as u32);
                output[output_ptr + i] = win.put(v);
            }

            output_ptr += length;
        }

        reader.byte_align();
        total_output.append(&mut output);
    }

    Ok(total_output)
}

impl<R: io::Read + io::Seek> io::Read for InstallerReader<R> {
    #[inline]
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        self.inner.read(buf)
    }
}

impl<R: io::Seek> io::Seek for InstallerReader<R> {
    #[inline]
    fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
        self.inner.seek(pos)
    }

    #[inline]
    fn stream_position(&mut self) -> io::Result<u64> {
        self.inner.stream_position()
    }

    #[inline]
    fn stream_len(&mut self) -> io::Result<u64> {
        self.inner.stream_len()
    }
}