sit-algos 0.3.0

Implementation of decompression algorithms used by StuffIt Expander and related applications
Documentation
use std::io::{self, Read, Seek};

use bitstream_io::BitRead;

const DICTIONARY_SIZE: usize = 16385;

pub struct LmzwReader<R: io::Read> {
    inner: bitstream_io::BitReader<R, bitstream_io::LittleEndian>,
    position: u64,
    uncompressed_size: u64,

    dict: [u16; DICTIONARY_SIZE],
    stack: [u16; DICTIONARY_SIZE],
    stack_ptr: usize,
    stack_len: u32,

    ptr: u32,
    more_bits_required: u32,
    bits: u32,
}

impl<R: io::Read> LmzwReader<R> {
    pub fn new(inner: R, uncompressed_size: u64) -> Self {
        Self {
            inner: bitstream_io::BitReader::<_, bitstream_io::LittleEndian>::new(inner),

            position: 0,
            uncompressed_size,
            dict: [0u16; DICTIONARY_SIZE],
            stack: [0u16; DICTIONARY_SIZE],
            stack_ptr: 0,
            stack_len: 0,

            bits: 0,
            ptr: 1,
            more_bits_required: 0,
        }
    }

    pub fn into_inner(self) -> R {
        self.inner.into_reader()
    }

    #[inline]
    fn produce_byte_from_stack(&mut self) -> u8 {
        assert_ne!(self.stack_ptr, 0);
        self.stack_ptr -= 1;
        let mut ptr = self.stack[self.stack_ptr] as u32;
        while ptr >= 256 {
            self.stack[self.stack_ptr] = self.dict[ptr as usize];
            self.stack_ptr += 1;
            ptr = self.dict[ptr as usize - 1] as u32;
        }

        ptr as u8
    }

    #[inline]
    fn produce_byte_from_ptr(&mut self) -> io::Result<Option<u8>> {
        self.stack[0] = self.ptr as u16;
        self.stack_ptr = 1;
        Ok(Some(self.produce_byte_from_stack()))
    }

    #[inline]
    fn produce_next_byte(&mut self) -> io::Result<Option<u8>> {
        if self.stack_ptr != 0 {
            return Ok(Some(self.produce_byte_from_stack()));
        }

        if self.ptr > self.stack_len {
            self.stack_len = 256;
            self.more_bits_required = self.stack_len << 1;
            self.bits = 9;

            if self.stream_position()? >= self.stream_len()? {
                return Ok(None);
            }

            self.ptr = self.inner.read_var(self.bits)?;
            assert!(self.ptr < self.stack_len);
            self.dict[255] = self.ptr as u16;
            return self.produce_byte_from_ptr();
        }

        if self.stream_position()? >= self.stream_len()? {
            return Ok(None);
        }

        self.ptr = self.inner.read_var(self.bits)?;
        if self.ptr >= self.stack_len {
            self.stack_len = 256;
            self.more_bits_required = self.stack_len << 1;
            self.bits = 9;

            if self.stream_position()? >= self.stream_len()? {
                return Ok(None);
            }

            self.ptr = self.inner.read_var(self.bits)?;
            assert!(self.ptr < self.stack_len);
            self.dict[255] = self.ptr as u16;
            return self.produce_byte_from_ptr();
        }

        self.dict[self.stack_len as usize] = self.ptr as u16;
        self.stack_len += 1;
        if self.stack_len == self.more_bits_required {
            self.more_bits_required <<= 1;
            self.bits += 1;
        }

        self.produce_byte_from_ptr()
    }
}

impl<R: io::Read> io::Read for LmzwReader<R> {
    #[inline]
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        if self.position >= self.uncompressed_size {
            return Ok(0);
        }

        for (idx, byte) in buf.iter_mut().enumerate() {
            match self.produce_next_byte()? {
                Some(val) => {
                    self.position += 1;
                    *byte = val;
                }
                None => return Ok(idx),
            }
        }

        Ok(buf.len())
    }
}

impl<R: io::Read> io::Seek for LmzwReader<R> {
    fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
        Ok(match pos {
            io::SeekFrom::Current(0) => todo!(),
            io::SeekFrom::Current(n) if n < 0 => todo!(),
            io::SeekFrom::Current(x) => {
                let mut buf = vec![0u8; x as usize];
                self.read(&mut buf)? as u64
            }
            io::SeekFrom::End(_) => todo!(),
            io::SeekFrom::Start(n) if n > self.position => {
                self.seek(io::SeekFrom::Current(n as i64 - self.position as i64))?
            }
            _ => todo!(),
        })
    }

    #[inline]
    fn stream_position(&mut self) -> io::Result<u64> {
        Ok(self.position)
    }

    #[inline]
    fn stream_len(&mut self) -> io::Result<u64> {
        Ok(self.uncompressed_size)
    }
}