sit-algos 0.3.0

Implementation of decompression algorithms used by StuffIt Expander and related applications
Documentation
use binrw::BinReaderExt;
use std::io;

#[derive(Debug, thiserror::Error)]
pub enum Error {
    #[error(transparent)]
    Io(#[from] io::Error),

    #[error(transparent)]
    BinRw(#[from] binrw::Error),

    #[error("Input data did not produce enough data and is likely cut-off or incomplete")]
    NotEnoughInstructions,

    #[error("Input would have produced more data than expected and was aborted")]
    TooMuchOutput,

    #[error("Invalid reference")]
    BookmarkHasNotBeenSet,

    #[error("Referenced dictionary entry does not exist")]
    DictionaryOutOfBounds,

    #[error("Unknown tree encoding")]
    TreeEncodingUnknown,

    #[error("Embedded tree is not properly encoded")]
    InvalidTree,
}

struct CheckpointMap {
    storage: [Option<usize>; 0x1000],
    next: u8,
}

impl CheckpointMap {
    fn new() -> CheckpointMap {
        Self {
            storage: [None; 0x1000],
            next: 0,
        }
    }

    fn add(&mut self, key: u16, value: usize) {
        let index = self.next as usize + key as usize;
        self.next = self.next.wrapping_add(1);
        self.storage[index] = Some(value);
    }

    fn get(&mut self, key: u16) -> Option<usize> {
        self.storage[key as usize]
    }
}

enum Command {
    Literal,
    Reference,
}

struct CommandBuffer {
    value: u16,
    bits: u8,
}

impl CommandBuffer {
    #[inline]
    pub fn new() -> Self {
        Self { value: 0, bits: 0 }
    }

    #[inline]
    pub fn next(&mut self, mut reader: impl io::Read + io::Seek) -> Result<Command, Error> {
        if self.is_empty() {
            match reader.read_le() {
                Ok(value) => {
                    self.value = value;
                    self.bits = 16
                }
                Err(e) if e.is_eof() => return Err(Error::NotEnoughInstructions),
                Err(e) => return Err(Error::BinRw(e)),
            }
        }

        let bitset = (self.value & 1) != 0;
        self.bits -= 1;
        self.value >>= 1;
        Ok(if bitset {
            Command::Reference
        } else {
            Command::Literal
        })
    }

    #[inline]
    fn is_empty(&self) -> bool {
        self.bits == 0
    }
}

pub fn decompress(
    uncompressed_len: usize,
    mut input: impl io::Read + io::Seek,
) -> Result<Vec<u8>, Error> {
    let mut output = vec![0u8; uncompressed_len];
    let mut output_ptr: usize = 0;
    let mut bookmark = CheckpointMap::new();
    let mut cmd = CommandBuffer::new();

    let mut streak = 0;

    fn hash(output: &[u8], i: usize) -> u16 {
        (output[i]
            .wrapping_add(output[i - 1])
            .wrapping_add(output[i - 2])
            & 0x1e)
            .cast_signed() as u16
            * 0x80
    }

    loop {
        if output_ptr == output.len() {
            return Ok(output);
        }

        match cmd.next(&mut input)? {
            Command::Literal => {
                output[output_ptr] = input.read_be()?;
                output_ptr += 1;
                streak += 1;

                if streak == 3 {
                    let key = hash(&output, output_ptr - 1);
                    bookmark.add(key, output_ptr - 3);
                    streak = 2;
                }
            }
            Command::Reference => {
                let [b1, b2]: [u8; 2] = input.read_le()?;
                let [b1, b2] = [b1 as u16, b2 as u16];
                let count = (b1 & 0x0F) as usize + 3;
                if output_ptr + count >= output.len() {
                    return Err(Error::TooMuchOutput);
                }

                let key = ((b1 & 0xF0) << 4) | b2;
                if let Some(offset) = bookmark.get(key) {
                    for i in 0..(count) {
                        output[output_ptr + i] = output[offset + i];
                    }
                } else {
                    output[output_ptr..(output_ptr + count)].fill(0x20);
                }

                if streak > 0 {
                    let key = hash(&output, output_ptr - streak + 2);
                    bookmark.add(key, output_ptr - streak);
                }

                if streak == 2 {
                    let key = hash(&output, output_ptr + 1);
                    bookmark.add(key, output_ptr - 1);
                }

                bookmark.add((b1 & 0xF0) << 4, output_ptr);
                output_ptr += count;
                streak = 0;
            }
        }
    }
}