compact-pro 0.2.0

Read compressed files created by Compact Pro
Documentation
#![feature(seek_stream_len)]
#![feature(get_mut_unchecked)]
#![doc=include_str!("../README.md")]

use std::{
    fs,
    io::{self, Read as _, Seek as _},
    path::Path,
    rc::Rc,
};

use binrw::BinReaderExt;
pub use macintosh_utils::{chrono, fourcc, Fork, FourCC};

mod entry_iterator;
mod entry_reader;
pub mod error;
mod lzh;
mod rle;

/// On-disk structures of CPT files
pub mod structs;

use crate::{
    entry_iterator::EntryIterator, entry_reader::StreamDescription, error::Feature,
    error::VerificationError,
};
use entry_reader::EntryReader;
pub use error::Error;
use structs::ArchiveHeader;
pub use structs::{Entry, Flags};
const CRC32: crc::Crc<u32> = crc::Crc::<u32>::new(&crc::CRC_32_JAMCRC);

/// A structure representing a CPT archive
///
/// Archives are created by [`opening`](`Archive::open) a file on disk or by wrapping a seekable
/// reader using the [`try_from`](`Archive::try_from`) function.
pub struct Archive<R> {
    inner: Rc<R>,
    entries_start: u64,
}

impl<R> Archive<R> {
    /// Macintosh type code used by CPT files (`PACT`)
    pub const TYPE_CODE: FourCC = fourcc!("PACT");
    /// Macintosh creator code used by CPT files (`CPCT`)
    pub const CREATOR_CODE: FourCC = fourcc!("CPCT");

    /// Returns the underlying reader, note that it's read position will have changed from when you
    /// passed it in
    pub fn into_inner(self) -> R {
        self.ensure_not_iterating();
        // SAFETY: Since no one is iterating, and any open entry would borrow the archive mutably
        // this is safe
        let Archive { inner, .. } = self;
        unsafe { Rc::try_unwrap(inner).unwrap_unchecked() }
    }

    /// Panics at runtime if the archive is being iterated
    ///
    /// This is used to assert our invariant that there can not be two or more iterators over the archive
    /// at the same time, as this would move the seek position of the underlying
    /// reader unpredictably.
    fn ensure_not_iterating(&self) {
        if Rc::strong_count(&self.inner) != 1 || Rc::weak_count(&self.inner) != 0 {
            panic!("Can not modify archive while an iterator is runnning")
        }
    }
}

#[cfg(feature = "macbinary")]
impl<A: io::Read + io::Seek> Archive<macbinary::Reader<A>> {
    /// Creates an [`Archive`] from the given reader
    pub fn try_from(inner: A) -> Result<Self, Error> {
        use macintosh_utils::FourCC;

        use crate::structs::CatalogHeader;

        let wrapper = macbinary::MacBinary::try_new(inner)?;
        let file_code = wrapper.type_code();
        let creator = wrapper.creator_code();
        if file_code != FourCC::default() && file_code != Archive::<()>::TYPE_CODE
            || creator != FourCC::default() && creator != Archive::<()>::CREATOR_CODE
        {
            return Err(Error::InvalidFile);
        }

        let mut inner = wrapper.into_data_fork()?;
        let header: ArchiveHeader = inner.read_be()?;
        let header_count_offset: i64 =
            header.header_offset as i64 - ArchiveHeader::PACKED_SIZE as i64;
        inner.seek(std::io::SeekFrom::Current(header_count_offset))?;
        let _ = inner.read_be::<CatalogHeader>()?;

        let position = inner.stream_position()?;

        Ok(Self {
            inner: Rc::new(inner),
            entries_start: position,
        })
    }
}

impl<R: io::Read + io::Seek> Archive<R> {
    #[cfg(not(feature = "macbinary"))]
    pub fn try_from(mut inner: R) -> Result<Self, Error> {
        let header: ArchiveHeader = inner.read_be()?;
        let header_count_offset: i64 =
            header.header_offset as i64 - ArchiveHeader::PACKED_SIZE as i64;
        inner.seek(std::io::SeekFrom::Current(header_count_offset))?;
        let header_cont = inner.read_be()?;

        let position = inner.stream_position()?;

        Ok(Self {
            inner: Rc::new(inner),
            archive_header: header,
            catalog: header_cont,
            entries_start: position,
        })
    }

    /// Creates an iterator over all archive entries.
    ///
    /// This method panics if another [`EntryIterator`] is currently alive for this archive to
    /// avoid over sharing the underlying reader's seek position.
    pub fn iter(&self) -> Result<EntryIterator<R>, Error> {
        self.ensure_not_iterating();

        EntryIterator::try_at(self.entries_start, self.inner.clone())
    }

    /// Verifies the given entry, returning a verification error if anything goes wrong
    pub fn verify_entry(&mut self, entry: &Entry) -> Result<(), VerificationError> {
        let Some(file) = entry.as_file() else {
            return Ok(());
        };

        let mut checksum = CRC32.digest();

        let mut resource = vec![0u8; file.rsrc_uncompressed_size as usize];
        let mut reader = self.open_entry(entry, Fork::Resource)?;
        reader.read_exact(&mut resource)?;
        checksum.update(&resource);

        let mut data = vec![0u8; file.data_uncompressed_size as usize];
        let mut reader = self.open_entry(entry, Fork::Data)?;
        reader.read_exact(&mut data)?;
        checksum.update(&data);

        if checksum.finalize() != file.crc32 {
            Err(VerificationError::InvalidChecksum)
        } else {
            Ok(())
        }
    }

    /// Opens the specified fork of an entry for reading
    pub fn open_entry(&mut self, entry: &Entry, fork: Fork) -> Result<EntryReader<'_, R>, Error> {
        if entry.as_directory().is_some() {
            return EntryReader::try_new(&mut self.inner, StreamDescription::default());
        }

        let file = entry.as_file().unwrap();
        if file.flags.contains(Flags::ENCRYPTED) {
            return Err(Error::UnsupportedFeature(Feature::Encryption));
        }

        EntryReader::try_new(&mut self.inner, entry.spec(fork))
    }
}

#[cfg(not(feature = "macbinary"))]
impl Archive<fs::File> {
    /// Opens an [`Archive`] from the file system
    pub fn open(path: impl AsRef<Path>) -> Result<Self, Error> {
        Self::try_from(fs::File::open(path)?)
    }
}

#[cfg(feature = "macbinary")]
impl Archive<macbinary::Reader<fs::File>> {
    /// Opens an [`Archive`] from the file system while transparently undoing MacBinar encoding of
    /// the archive (not its entries).
    pub fn open(path: impl AsRef<Path>) -> Result<Self, Error> {
        Archive::try_from(fs::File::open(path)?)
    }
}

/// Detect if the given reader is a CPT archive
///
///  ```
/// let mut file = std::fs::File::open("sample-files/FRED.CPT").unwrap();
/// assert!(cpt::probe(file).is_ok());
///
/// let mut file = std::fs::File::open("README.md").unwrap();
/// assert!(cpt::probe(file).is_err());
/// ```
pub fn probe<R: io::Read + io::Seek>(reader: R) -> Result<(FourCC, FourCC), Error> {
    let _ = Archive::try_from(reader)?;
    // TODO: we should probably verfiy the header crc here to avoid false positives
    Ok((Archive::<()>::CREATOR_CODE, Archive::<()>::TYPE_CODE))
}

/// Verify the structure and checksums of the given reader
///
/// ```rust
/// let mut file = std::fs::File::open("sample-files/FRED.CPT").unwrap();
/// assert!(cpt::verify(file).is_ok());
/// ```
pub fn verify<R: io::Read + io::Seek>(reader: R) -> Result<(), VerificationError> {
    let mut archive = Archive::try_from(reader)?;
    if archive
        .iter()
        .unwrap()
        .filter_map(|a| a.ok())
        .filter(|a| a.is_file())
        .all(|entry| archive.verify_entry(&entry).is_ok())
    {
        Ok(())
    } else {
        Err(VerificationError::InvalidChecksum)
    }
}

#[cfg(test)]
mod test {
    use std::{
        fs::{exists, File},
        path::PathBuf,
    };

    use super::Archive;

    #[test]
    fn open_sample_file() {
        let archive = open_fixture("Deep Thoughts Quotes.cpt");
        assert_eq!(archive.iter().unwrap().count(), 91);

        let archive = open_fixture("Misc Quotes.cpt");
        assert_eq!(archive.iter().unwrap().count(), 80);
    }

    #[test]
    fn verify_checksums() {
        verify_archive("Deep Thoughts Quotes.cpt");
        verify_archive("Compact Pro Package (English)");
        verify_archive("zipit.sea");
        verify_archive("Misc Quotes.cpt");
        verify_archive("zipit.sea");
    }

    #[test]
    fn open_file_backed_via_path() {
        assert!(Archive::open("sample-files/Deep Thoughts Quotes.cpt").is_ok());
    }

    #[test]
    fn can_open_macbinaries() {
        verify_archive("sample-files/freddie1.cpt");
        verify_archive("sample-files/sgnews49.cpt");
        verify_archive("sample-files/sgnews02.cpt");
        verify_archive("sample-files/giffer.cpt");
        verify_archive("sample-files/h2chr308.cpt");
        verify_archive("sample-files/h2dla250.cpt");
        verify_archive("sample-files/h2lgm252.cpt");
        verify_archive("sample-files/jpeg2gif.cpt");
    }

    fn open_fixture_raw(name: &'static str) -> File {
        let path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
            .join("sample-files/")
            .join(name);

        if !exists(&path).unwrap() {
            panic!("Test fixture {name} does not exist!");
        }

        std::fs::File::open(path).unwrap()
    }

    fn open_fixture(name: &'static str) -> Archive<macbinary::Reader<File>> {
        let file = open_fixture_raw(name);
        Archive::try_from(file).unwrap()
    }

    fn verify_archive(name: &'static str) {
        let mut archive = open_fixture("zipit.sea");
        archive
            .iter()
            .unwrap()
            .filter_map(|a| a.ok())
            .filter(|a| a.is_file())
            .for_each(|entry| {
                archive.verify_entry(&entry).unwrap_or_else(|_| {
                    panic!("Entry {} in {} should be valid", entry.name(), name)
                })
            });
    }
}