rpgcpf 0.1.0

GCPF archive compression and decompression library
Documentation
//! Types and functions for compressing and decompressing [Godot](https://godotengine.org/)'s GCPF files

use std::io::{Cursor, Seek, SeekFrom, Write};

pub use crate::{
    compression_mode::CompressionMode, compressor::Compressor, decompressor::Decompressor,
    error::Error,
};

/// Default block size used for compression
pub const DEFAULT_BLOCK_SIZE: u32 = 4096;

/// A GCPF file's metadata and compressed data
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct Gcpf {
    /// Compression algorithm used for the data
    pub compression_mode: CompressionMode,

    /// Block size used during compression
    pub block_size: u32,

    /// Size of data when uncompressed
    pub uncompressed_size: u32,

    /// Number of blocks used for compressed data
    pub blocks: Vec<Vec<u8>>,
}

impl Gcpf {
    /// Read, parse, and verify a GCPF file
    ///
    /// # Errors
    ///
    /// Will return `Err` if:
    ///
    /// - `data` is truncated or corrupted ([`Error::GcpfTruncated`])
    /// - `data` doesn't have the `b"GCPF"` magic header and footer ([`Error::BadMagic`])
    pub fn read(data: &[u8]) -> Result<Self, Error> {
        // error early in case of obvious truncation
        // 20 bytes is the bare minimum size for an empty GCPF file
        // 4 bytes for magic header, footer, compression mode, block size, and uncompressed size
        if data.len() < 20 {
            return Err(Error::GcpfTruncated);
        }

        if &data[0..4] != b"GCPF" {
            if let Ok(magic) = data[0..4].try_into() {
                return Err(Error::BadMagic(magic));
            }
            return Err(Error::GcpfTruncated);
        }

        let compression_mode = if let Ok(bytes) = data[4..8].try_into() {
            CompressionMode::from(u32::from_le_bytes(bytes))
        } else {
            return Err(Error::GcpfTruncated);
        };

        let block_size = if let Ok(bytes) = data[8..12].try_into() {
            u32::from_le_bytes(bytes)
        } else {
            return Err(Error::GcpfTruncated);
        };

        let uncompressed_size = if let Ok(bytes) = data[12..16].try_into() {
            u32::from_le_bytes(bytes)
        } else {
            return Err(Error::GcpfTruncated);
        };

        let block_count = uncompressed_size.div_ceil(block_size);

        let mut block_sizes = Vec::with_capacity(block_count as usize);

        let mut offset = 16usize;

        if data.len() < offset + (block_count as usize * 4) {
            return Err(Error::GcpfTruncated);
        }

        for _ in 0..block_count {
            let block_size = if let Ok(bytes) = data[offset..offset + 4].try_into() {
                u32::from_le_bytes(bytes)
            } else {
                return Err(Error::GcpfTruncated);
            };

            block_sizes.push(block_size);
            offset += 4;
        }

        // check data len is large enough for all the blocks + footer
        if data.len() < offset + 4 + block_sizes.iter().fold(0, |a, b| a + *b as usize) {
            return Err(Error::GcpfTruncated);
        }

        let mut blocks: Vec<Vec<u8>> = Vec::with_capacity(block_count as usize);

        for block_size in block_sizes {
            blocks.push(data[offset..offset + block_size as usize].into());
            offset += block_size as usize;
        }

        if &data[offset..offset + 4] != b"GCPF" {
            if let Ok(magic) = data[offset..offset + 4].try_into() {
                return Err(Error::BadMagic(magic));
            }
            return Err(Error::GcpfTruncated);
        }

        Ok(Self {
            compression_mode,
            block_size,
            uncompressed_size,
            blocks,
        })
    }

    /// Decompress the data in a [`Gcpf`] instance
    ///
    /// # Errors
    ///
    /// Will return `Err` if:
    ///
    /// - [`Decompressor`] implementation is not available ([`Error::NoDecompressor`])
    /// - An error happens during decompression ([`Error::DecompressionError`])
    /// - An I/O error occurs ([`Error::IOError`])
    pub fn decompress(&self) -> Result<Vec<u8>, Error> {
        let mut buf = Vec::new();

        let decompressor = self.compression_mode.get_decompressor()?;

        for block in &self.blocks {
            buf.write_all(&decompressor.decompress(block)?)?;
        }

        Ok(buf)
    }

    /// Compress `data` to GCPF encoding with the default [`BLOCK_SIZE`]
    ///
    /// # Errors
    ///
    /// See [`Gcpf::Compress_blocksize`]
    pub fn compress(data: &[u8], compression_mode: CompressionMode) -> Result<Vec<u8>, Error> {
        let compressor = compression_mode.get_compressor()?;

        Self::compress_settings(
            data,
            compression_mode,
            DEFAULT_BLOCK_SIZE,
            compressor.as_ref(),
        )
    }

    /// Compress `data` to GCPF encoding with a chosen block size
    ///
    /// # Errors
    ///
    /// Will return `Err` if:
    ///
    /// - the `data` length is greater than 4GiB ([`Error::DataTooBig`])
    /// - an I/O error occurs ([`Error::IOError`])
    /// - [`Compressor`] implementation is not available ([`Error::NoCompressor`])
    /// - An error happens during compression ([`Error::CompressionError`])
    pub fn compress_settings(
        data: &[u8],
        compression_mode: CompressionMode,
        block_size: u32,
        compressor: &dyn Compressor,
    ) -> Result<Vec<u8>, Error> {
        if data.len() > u32::MAX as usize {
            return Err(Error::DataTooBig);
        }

        let mut buf = Vec::new();

        buf.write_all(b"GCPF")?;
        buf.write_all(&u32::from(compression_mode).to_le_bytes())?;
        buf.write_all(&block_size.to_le_bytes())?;

        if let Ok(data_len) = u32::try_from(data.len()) {
            buf.write_all(&data_len.to_le_bytes())?;
        } else {
            return Err(Error::GcpfTruncated);
        }

        let block_count = data.len().div_ceil(block_size as usize);

        let mut buf = Cursor::new(buf);
        buf.seek_relative(16)?;

        Self::write_blocks(data, block_count, block_size, &mut buf, compressor)?;

        buf.seek(SeekFrom::End(0))?;

        buf.write_all(b"GCPF")?;

        Ok(buf.into_inner())
    }

    /// Compress data blocks, write the block size, and then write the block. This method uses a seekable write buffer as an optimization to avoid iterating blocks twice.
    ///
    /// # Errors
    ///
    /// Will return `Err` if
    /// - the `data` length is greater than 4GiB ([`Error::DataTooBig`])
    /// - An error happens during compression ([`Error::CompressionError`])
    /// - an I/O error occurs ([`Error::IOError`])
    ///
    /// # Panics
    ///
    /// Will panic if any of the compressed blocks is larger than [`i64::MAX`] bytes
    fn write_blocks<W>(
        data: &[u8],
        block_count: usize,
        block_size: u32,
        buf: &mut W,
        compressor: &dyn Compressor,
    ) -> Result<(), Error>
    where
        W: Write + Seek,
    {
        // TODO: use a `Cursor` around `buf` to write blocks and block sizes without an
        // intermediary block vector. a previous attempt didn't work, and broke multiblock
        // compression.
        let mut blocks: Vec<Vec<u8>> = Vec::with_capacity(block_count);

        for index in 0..block_count {
            // the last block might be shorter than block_size, don't want to overread the buffer
            let data_slice = if data.len() <= (index + 1) * block_size as usize {
                &data[index * block_size as usize..]
            } else {
                &data[index * block_size as usize..(index + 1) * block_size as usize]
            };

            let compressed = compressor.compress(data_slice)?;

            if compressed.len() > u32::MAX as usize {
                return Err(Error::DataTooBig);
            }

            if let Ok(compressed_len) = u32::try_from(compressed.len()) {
                buf.write_all(&compressed_len.to_le_bytes())?;
                blocks.push(compressed);
            } else {
                return Err(Error::GcpfTruncated);
            }
        }

        for block in blocks {
            buf.write_all(&block)?;
        }

        Ok(())
    }
}