sit-rs 0.3.0

Rust-native extraction for StuffIt Expander archive files
Documentation
use std::{cmp::min, io};

use binrw::{BinRead, BinReaderExt, binread};
use fourcc::{FourCC, fourcc};
use macintosh_utils::{FinderFlags, Fork, decode_string};

use super::{Algorithm, Version};

trait SitIdentifier {
    fn is_v1(&self) -> bool;
}

impl SitIdentifier for FourCC {
    fn is_v1(&self) -> bool {
        matches!(
            self,
            fourcc!("SIT!")
                | fourcc!("SITD")
                | fourcc!("SIT2")
                | fourcc!("SIT5")
                // StuffIt Installer Maker 3.1
                | fourcc!("STi3")
                // StuffIt Installer Maker 6.5
                | fourcc!("ST65")
        )
    }
}

#[binread]
#[derive(Debug)]
#[br(big)]
pub struct ArchiveHeader {
    /// File identifier, this should be 'SIT!', 'SITD', 'SIT2' or 'SIT5'.
    ///
    /// If it says 'Stuf' you are probably dealing with a version 5 archive instead
    #[br(assert(file_code.is_v1()))]
    pub file_code: FourCC,
    /// Number of entries in the root of the archive
    pub entry_count: u16,
    /// total size of the archive file in bytes (including header and everything)
    pub archive_len: u32,
    /// Must be 'rLau' – this is in reference to Raymond Lau who created the format
    #[br(assert(creator_code==fourcc!("rLau")), temp)]
    creator_code: FourCC,
    /// File format (sub-)version
    pub version: Version,
    /// Reserved for future use (in v1)
    /// - if `version==1` this is set to 00 00 00 00 00 00 00
    /// - if `version==2` this is set to XX NN NN NN NN XX XX
    ///    where NN NN NN NN is the offset to the first root entry from the start of the file in bytes
    pub reserved: [u8; 7],
}

impl ArchiveHeader {
    pub fn first_entry_offset(&self) -> u64 {
        match self.version {
            Version::Early => 0x16,
            Version::Later => {
                (self.reserved[1] as u64) << 24
                    | (self.reserved[2] as u64) << 16
                    | (self.reserved[3] as u64) << 8
                    | (self.reserved[4] as u64)
            }
            _ => 0x16,
        }
    }
}

#[binread]
#[derive(Debug, Clone)]
#[br(big, import { offset: u64 })]
pub struct File {
    #[br(restore_position, map(|v:u8| v & 128 != 0))]
    // Data fork is encrypted
    pub rsrc_encrypted: bool,
    // let data_encryption = buffer[0] & 128 != 0;
    // Method used to compress resource fork
    pub rsrc_compression: Algorithm,
    // Resource fork is encrypted
    #[br(restore_position, map(|v:u8| v & 128 != 0))]
    // Data fork is encrypted
    pub data_encrypted: bool,
    // Method used to compress data fork
    pub data_compression: Algorithm,
    #[br(temp)]
    name_len: u8,
    #[br(map(|r: [u8; 63]| decode_string(r[0..min(name_len as usize,63)].to_vec())))]
    /// Name of the compressed file
    pub file_name: String,
    /// Mac OS file type
    pub file_code: FourCC,
    /// Mac OS file creator
    pub creator_code: FourCC,
    /// Attributes for file in Finder
    pub flags: FinderFlags,
    /// Creation date in classic Mac OS format (seconds since 1904)
    #[br(map(macintosh_utils::date))]
    pub created_at: chrono::DateTime<chrono::Utc>,
    /// Modification date in classic Mac OS format (seconds since 1904)
    #[br(map(macintosh_utils::date))]
    pub modified_at: chrono::DateTime<chrono::Utc>,
    /// Size of resource fork after decompression
    pub rsrc_uncompressed_size: u32,
    /// Size of data fork after decompression
    pub data_uncompressed_size: u32,
    /// Size of compressed resource fork data
    pub rsrc_compressed_size: u32,
    /// Size of compressed data fork data
    pub data_compressed_size: u32,
    /// CRC-16 checksum for decompressed resource data
    ///
    /// The CRC-16 configuration is `CRC-16-IBM` also known as `CRC-16-ARC` where the CRC of ASCII "123456789" is 0xbb3d.
    pub rsrc_crc: u16,
    /// CRC-16 checksum for decompressed data fork data
    ///
    /// The CRC-16 configuration is `CRC-16-IBM` also known as `CRC-16-ARC` where the CRC of ASCII "123456789" is 0xbb3d.
    pub data_crc: u16,
    /// Reserved for future use
    pub reserved: [u8; 6],
    /// CRC-16 checksum of the file header without these last two bytes
    ///
    /// The CRC-16 configuration is `CRC-16-IBM` also known as `CRC-16-ARC` where the CRC of ASCII "123456789" is 0xbb3d.
    pub header_crc: u16,

    #[br(calc(offset + Entry::HEADER_SIZE))]
    pub payload_offset: u64,

    #[br(ignore)]
    /// Index of the file entry determine by counting previous file entries in depth-first search
    /// order
    pub index: usize,
}

impl File {
    #[inline]
    pub fn uncompressed_size(&self, fork: Fork) -> usize {
        match fork {
            Fork::Data => self.data_uncompressed_size as usize,
            Fork::Resource => self.rsrc_uncompressed_size as usize,
        }
    }

    #[inline]
    pub fn compressed_size(&self, fork: Fork) -> usize {
        match fork {
            Fork::Data => self.data_compressed_size as usize,
            Fork::Resource => self.rsrc_compressed_size as usize,
        }
    }

    #[inline]
    pub fn compression_method(&self, fork: Fork) -> Algorithm {
        match fork {
            Fork::Data => self.data_compression,
            Fork::Resource => self.rsrc_compression,
        }
    }

    #[inline]
    pub fn checksum(&self, fork: Fork) -> u16 {
        match fork {
            Fork::Data => self.data_crc,
            Fork::Resource => self.rsrc_crc,
        }
    }

    #[inline]
    pub fn encrypted(&self, fork: Fork) -> bool {
        match fork {
            Fork::Data => self.data_encrypted,
            Fork::Resource => self.rsrc_encrypted,
        }
    }

    #[inline]
    pub fn offset(&self, fork: Fork) -> u64 {
        match fork {
            Fork::Resource => self.payload_offset,
            Fork::Data => self.payload_offset + self.compressed_size(Fork::Resource) as u64,
        }
    }

    pub fn uses_encryption(&self) -> bool {
        self.encrypted(Fork::Data) || self.encrypted(Fork::Resource)
    }
}

#[allow(unused)]
#[binread]
#[derive(Debug, Clone)]
#[br(big)]
pub struct Directory {
    #[br(temp)]
    flags1: u8,
    #[br(calc(flags1 & 16 != 0))]
    contains_encrypted_entries: bool,
    pub flags2: u8,
    #[br(temp)]
    name_len: u8,
    #[br(map(|r: [u8; 63]| decode_string(r[0..min(name_len as usize, 63)].to_vec())))]
    /// Name of the compressed file
    pub file_name: String,
    /// position of  Mac OS file type and creator code in file header, unused for directories
    garbage: [u8; 8],
    /// Attributes for file in Finder
    pub flags: FinderFlags,
    /// Creation date in classic Mac OS format (seconds since 1904)
    #[br(map(macintosh_utils::date))]
    pub created_at: chrono::DateTime<chrono::Utc>,
    /// Modification date in classic Mac OS format (seconds since 1904)
    #[br(map(macintosh_utils::date))]
    pub modified_at: chrono::DateTime<chrono::Utc>,
}

impl Directory {
    #[inline]
    pub fn uncompressed_size(&self, _: Fork) -> usize {
        0
    }

    #[inline]
    pub fn algorithm(&self, _: Fork) -> Algorithm {
        Algorithm::None
    }

    #[inline]
    pub fn compressed_size(&self, _: Fork) -> usize {
        0
    }

    #[inline]
    pub fn checksum(&self, _: Fork) -> u16 {
        0
    }

    #[inline]
    pub fn offset(&self, _: Fork) -> u64 {
        0
    }

    pub fn uses_encryption(&self) -> bool {
        self.contains_encrypted_entries
    }
}

/// Represents an entry in the sit archive
pub enum Entry {
    File(File),
    Directory(Directory),
    DirectoryEnd,
}

impl Entry {
    /// Fixed size of and archive entry header on disk
    pub const HEADER_SIZE: u64 = 112;
}

impl BinRead for Entry {
    type Args<'a> = ();

    fn read_options<R: io::Read + io::Seek>(
        reader: &mut R,
        _endian: binrw::Endian,
        _args: Self::Args<'_>,
    ) -> binrw::BinResult<Self> {
        let offset = reader.stream_position()?;
        let mut buffer = vec![0u8; Entry::HEADER_SIZE as usize];
        reader.read_exact(&mut buffer)?;

        if (buffer[0] & 33) == 33 {
            Ok(Entry::DirectoryEnd)
        } else if (buffer[0] & 32) == 32 {
            let mut cursor = io::Cursor::new(buffer);
            Ok(Entry::Directory(cursor.read_be()?))
        } else {
            let mut cursor = io::Cursor::new(buffer);
            Ok(Entry::File(
                cursor.read_be_args(FileBinReadArgs { offset })?,
            ))
        }
    }
}

impl From<File> for Entry {
    fn from(val: File) -> Self {
        Entry::File(val)
    }
}

impl From<Directory> for Entry {
    fn from(val: Directory) -> Self {
        Entry::Directory(val)
    }
}