sit-rs 0.3.0

Rust-native extraction for StuffIt Expander archive files
Documentation
use std::{
    io::{self, Read as _, Seek as _},
    rc::Rc,
};

use binrw::BinReaderExt;
use crc::{self, Digest};

use crate::{
    Entry, Error,
    algos::EntryReader,
    structs::{
        v1,
        v5::{self, EntryBinReadArgs},
    },
};

const VERIFICATION_CHUNK_SIZE: usize = 1 << 20 /* bytes => 1 Mb */;
const CRC16: crc::Crc<u16> = crc::Crc::<u16>::new(&crc::CRC_16_ARC);

pub struct VerifyingIterator<R: io::Read + io::Seek> {
    pub(crate) next_offset: u64,
    pub(crate) stack: Vec<u32>,
    pub(crate) v1: bool,
    pub(crate) _next_file_index: usize,

    pub(crate) reader: Rc<R>,
}

impl<R: io::Read + io::Seek> Iterator for VerifyingIterator<R> {
    type Item = Entry;

    fn next(&mut self) -> Option<Self::Item> {
        let reader = unsafe { Rc::get_mut_unchecked(&mut self.reader) };

        match self.stack.last_mut() {
            None => return None,
            Some(0) => {
                self.stack.pop();

                return if self.stack.is_empty() {
                    None
                } else {
                    Some(Entry::DirectoryEnd(reader.stream_position().unwrap()))
                };
            }
            Some(d) => *d -= 1,
        }

        let Ok(entry_offset) = reader.seek(io::SeekFrom::Start(self.next_offset)) else {
            log::warn!("Failed seeking to next archive entry");
            return None;
        };

        if self.v1 {
            let Ok(entry) = reader.read_be::<v1::Entry>() else {
                return None;
            };

            let Ok(payload_offset) = reader.stream_position() else {
                return None;
            };

            match entry {
                v1::Entry::Directory(dir) => {
                    self.next_offset = payload_offset;
                    self.stack.push(u32::MAX);

                    Some(Entry::Directory(dir.into()))
                }
                v1::Entry::DirectoryEnd => {
                    self.next_offset = payload_offset;
                    self.stack.pop();

                    Some(Entry::DirectoryEnd(payload_offset))
                }
                v1::Entry::File(file) => {
                    self.next_offset = payload_offset
                        + file.data_compressed_size as u64
                        + file.rsrc_compressed_size as u64;

                    Some(Entry::File(file.into()))
                }
            }
        } else {
            let Ok(entry) = reader.read_be_args::<v5::Entry>(
                EntryBinReadArgs::builder().offset(entry_offset).finalize(),
            ) else {
                return None;
            };

            let Ok(payload_offset) = reader.stream_position() else {
                return None;
            };

            match entry {
                v5::Entry::Directory(dir) => {
                    if dir.marks_end() {
                        self.next_offset = payload_offset;
                        self.next();
                    }

                    self.stack.push(dir.child_count);
                    self.next_offset = dir.first_child_offset;

                    Some(Entry::Directory(dir.into()))
                }
                v5::Entry::File(mut file) => {
                    file.payload_offset = reader.stream_position().unwrap();
                    self.next_offset = file.next_entry_offset as u64;

                    Some(Entry::File(file.into()))
                }
            }
        }
    }
}

pub struct VerifyingEntryReader<'a, R: io::Read + io::Seek> {
    inner: EntryReader<'a, R>,
    checksum: u16,
    digest: Digest<'a, u16>,
    skip: bool,
    empty: bool,
    invalid: bool,
}

impl<'a, R: io::Seek + io::Read> VerifyingEntryReader<'a, R> {
    pub(crate) fn new(mut inner: EntryReader<'a, R>, checksum: u16, skip: bool) -> Self {
        let empty_stream = inner.stream_len().unwrap() == 0;

        Self {
            invalid: false,
            empty: empty_stream,
            skip,
            inner,
            checksum,
            digest: CRC16.digest(),
        }
    }
}

impl<'a, R: io::Read + io::Seek> VerifyingEntryReader<'a, R> {
    /// Read all available data and verify it's contents using the provided checksum
    pub fn slurp(mut self) -> Result<(), Error> {
        let mut chunk = vec![0u8; VERIFICATION_CHUNK_SIZE];
        loop {
            match self.read(&mut chunk)? {
                s if s < VERIFICATION_CHUNK_SIZE => {
                    log::info!("slurpded: {s}");
                    return Ok(());
                }
                _ => {
                    continue;
                }
            }
        }
    }
}

impl<'a, R: io::Read + io::Seek> io::Read for VerifyingEntryReader<'a, R> {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        if self.empty {
            return Ok(0);
        }

        if self.invalid {
            return Err(io::Error::other(Error::ChecksumMismatch(
                crate::error::ChecksumLocation::DataStream,
            )));
        }

        let size = self.inner.read(buf)?;
        if self.skip {
            if self.inner.ended()?
                && let EntryReader::Arsenic { reader, .. } = &mut self.inner
            {
                if !reader.is_checksum_valid() {
                    self.invalid = true;
                    return Err(io::Error::other(Error::ChecksumMismatch(
                        crate::error::ChecksumLocation::DataStream,
                    )));
                } else {
                    self.empty = true;
                }
            }

            return Ok(size);
        }
        self.digest.update(&buf[0..size]);

        if self.inner.ended()? {
            self.digest.update(&[
                (self.checksum & 0xFF) as u8,
                ((self.checksum >> 8) & 0xFF) as u8,
            ]);

            log::info!(
                "Checking CRC16 checksum 0x{:04x} against 0x{:04x}",
                self.checksum,
                self.digest.clone().finalize(),
            );

            if self.digest.clone().finalize() != 0 {
                log::info!("Checksum is invalid");
                self.invalid = true;
                return Err(io::Error::other(Error::ChecksumMismatch(
                    crate::error::ChecksumLocation::DataStream,
                )));
            }

            log::info!("Checksum is valid");
            self.skip = true;
        }

        Ok(size)
    }
}