unity-asset-binary 0.2.0

Unity binary file format parser (AssetBundle, SerializedFile)
Documentation
//! Bundle compression handling
//!
//! This module provides compression and decompression functionality
//! for Unity AssetBundle blocks, supporting LZ4, LZMA, and Brotli.

use super::header::BundleHeader;
use crate::compression::{CompressionBlock, CompressionType, decompress};
use crate::error::{BinaryError, Result};
use crate::reader::{BinaryReader, ByteOrder};

/// Bundle compression handler
///
/// This struct provides methods for handling compressed bundle data,
/// including block info decompression and data block processing.
pub struct BundleCompression;

impl BundleCompression {
    /// Decompress blocks info data
    ///
    /// This method handles the decompression of the blocks information
    /// section of a bundle, which contains metadata about all compression blocks.
    pub fn decompress_blocks_info(
        header: &BundleHeader,
        compressed_data: &[u8],
    ) -> Result<Vec<u8>> {
        Self::decompress_blocks_info_limited(header, compressed_data, None)
    }

    pub fn decompress_blocks_info_limited(
        header: &BundleHeader,
        compressed_data: &[u8],
        max_uncompressed_size: Option<usize>,
    ) -> Result<Vec<u8>> {
        let expected_uncompressed = header.uncompressed_blocks_info_size as usize;
        if let Some(limit) = max_uncompressed_size
            && expected_uncompressed > limit
        {
            return Err(BinaryError::ResourceLimitExceeded(format!(
                "Blocks info uncompressed size {} exceeds limit {}",
                expected_uncompressed, limit
            )));
        }
        let compression_type = header.flags & 0x3F; // CompressionTypeMask

        match compression_type {
            0 => {
                // No compression
                Ok(compressed_data.to_vec())
            }
            2 | 3 => {
                // LZ4 or LZ4HC
                decompress(compressed_data, CompressionType::Lz4, expected_uncompressed)
            }
            1 => {
                // LZMA
                decompress(
                    compressed_data,
                    CompressionType::Lzma,
                    expected_uncompressed,
                )
            }
            4 => {
                // Brotli (newer Unity versions)
                decompress(
                    compressed_data,
                    CompressionType::Brotli,
                    expected_uncompressed,
                )
            }
            _ => Err(BinaryError::unsupported(format!(
                "Unknown compression type: {}",
                compression_type
            ))),
        }
    }

    /// Parse compression blocks from decompressed blocks info
    ///
    /// This method parses the compression block metadata from the
    /// decompressed blocks info data.
    pub fn parse_compression_blocks(data: &[u8]) -> Result<Vec<CompressionBlock>> {
        Self::parse_compression_blocks_limited(data, &super::types::BundleLoadOptions::fast())
    }

    pub fn parse_compression_blocks_limited(
        data: &[u8],
        options: &super::types::BundleLoadOptions,
    ) -> Result<Vec<CompressionBlock>> {
        let mut reader = BinaryReader::new(data, ByteOrder::Big);
        let mut blocks = Vec::new();

        // Skip uncompressed data hash (16 bytes) - critical step
        reader.read_bytes(16)?;

        // Read compression blocks
        let block_count_i32 = reader.read_i32()?;
        if block_count_i32 < 0 {
            return Err(BinaryError::invalid_data(format!(
                "Negative compression block count: {}",
                block_count_i32
            )));
        }
        let block_count = block_count_i32 as usize;
        if block_count > options.max_blocks {
            return Err(BinaryError::ResourceLimitExceeded(format!(
                "Compression block count {} exceeds limit {}",
                block_count, options.max_blocks
            )));
        }

        // Ensure the block table fits in the provided buffer.
        let table_bytes = block_count
            .checked_mul(10)
            .ok_or_else(|| BinaryError::invalid_data("Compression block table size overflow"))?;
        let required = 16usize
            .checked_add(4)
            .and_then(|v| v.checked_add(table_bytes))
            .ok_or_else(|| BinaryError::invalid_data("Compression block table size overflow"))?;
        if data.len() < required {
            return Err(BinaryError::not_enough_data(required, data.len()));
        }

        for _ in 0..block_count {
            let uncompressed_size = reader.read_u32()?;
            let compressed_size = reader.read_u32()?;
            let flags = reader.read_u16()?;

            let block = CompressionBlock::new(uncompressed_size, compressed_size, flags);
            blocks.push(block);
        }

        Ok(blocks)
    }

    /// Decompress all data blocks
    ///
    /// This method reads and decompresses all data blocks from the bundle,
    /// returning the complete decompressed data.
    pub fn decompress_data_blocks(
        header: &BundleHeader,
        blocks: &[CompressionBlock],
        reader: &mut BinaryReader,
    ) -> Result<Vec<u8>> {
        Self::decompress_data_blocks_limited(header, blocks, reader, None)
    }

    pub fn decompress_data_blocks_limited(
        header: &BundleHeader,
        blocks: &[CompressionBlock],
        reader: &mut BinaryReader,
        max_memory: Option<usize>,
    ) -> Result<Vec<u8>> {
        let mut total_uncompressed: u64 = 0;
        for block in blocks {
            total_uncompressed = total_uncompressed
                .checked_add(block.uncompressed_size as u64)
                .ok_or_else(|| BinaryError::invalid_data("Total uncompressed size overflow"))?;
        }
        if let Some(limit) = max_memory
            && total_uncompressed > limit as u64
        {
            return Err(BinaryError::ResourceLimitExceeded(format!(
                "Bundle decompressed size {} exceeds max_memory {}",
                total_uncompressed, limit
            )));
        }

        let total_uncompressed_usize = usize::try_from(total_uncompressed).map_err(|_| {
            BinaryError::ResourceLimitExceeded(format!(
                "Bundle decompressed size {} does not fit in usize",
                total_uncompressed
            ))
        })?;

        let mut decompressed_data = Vec::with_capacity(total_uncompressed_usize);

        // The caller is responsible for positioning `reader` at the start of block data, taking
        // header alignment and `BlocksInfoAtEnd` into account.
        let _ = header;

        for block in blocks.iter() {
            if let Some(limit) = max_memory
                && (block.uncompressed_size as u64) > (limit as u64)
            {
                return Err(BinaryError::ResourceLimitExceeded(format!(
                    "Block uncompressed size {} exceeds max_memory {}",
                    block.uncompressed_size, limit
                )));
            }
            let compressed = reader.read_bytes(block.compressed_size as usize)?;
            let block_data = block.decompress(&compressed)?;
            decompressed_data.extend_from_slice(&block_data);
        }

        Ok(decompressed_data)
    }

    /// Get compression statistics for blocks
    pub fn get_compression_stats(blocks: &[CompressionBlock]) -> CompressionStats {
        let total_compressed: u64 = blocks.iter().map(|b| b.compressed_size as u64).sum();
        let total_uncompressed: u64 = blocks.iter().map(|b| b.uncompressed_size as u64).sum();

        let compression_ratio = if total_uncompressed > 0 {
            total_compressed as f64 / total_uncompressed as f64
        } else {
            1.0
        };

        let space_saved = total_uncompressed.saturating_sub(total_compressed);

        CompressionStats {
            block_count: blocks.len(),
            total_compressed_size: total_compressed,
            total_uncompressed_size: total_uncompressed,
            compression_ratio,
            space_saved,
            average_block_size: if !blocks.is_empty() {
                total_uncompressed / blocks.len() as u64
            } else {
                0
            },
        }
    }

    /// Validate compression blocks
    pub fn validate_blocks(blocks: &[CompressionBlock]) -> Result<()> {
        if blocks.is_empty() {
            return Err(BinaryError::invalid_data("No compression blocks found"));
        }

        for (i, block) in blocks.iter().enumerate() {
            if block.compressed_size == 0 {
                return Err(BinaryError::invalid_data(format!(
                    "Block {} has zero compressed size",
                    i
                )));
            }

            if block.uncompressed_size == 0 {
                return Err(BinaryError::invalid_data(format!(
                    "Block {} has zero uncompressed size",
                    i
                )));
            }

            // Sanity check: compressed size shouldn't be much larger than uncompressed
            // (except for very small blocks or incompressible data)
            if block.compressed_size > block.uncompressed_size * 2 && block.uncompressed_size > 1024
            {
                return Err(BinaryError::invalid_data(format!(
                    "Block {} has suspicious compression ratio: {}/{}",
                    i, block.compressed_size, block.uncompressed_size
                )));
            }
        }

        Ok(())
    }

    /// Estimate memory usage for decompression
    pub fn estimate_memory_usage(blocks: &[CompressionBlock]) -> usize {
        // Estimate peak memory usage during decompression
        let total_uncompressed: usize = blocks.iter().map(|b| b.uncompressed_size as usize).sum();
        let max_block_size: usize = blocks
            .iter()
            .map(|b| b.uncompressed_size as usize)
            .max()
            .unwrap_or(0);

        // Peak usage: total output + largest single block for temporary decompression
        total_uncompressed + max_block_size
    }

    /// Check if compression type is supported
    pub fn is_compression_supported(compression_type: u32) -> bool {
        match compression_type {
            0 => true,     // None
            1 => true,     // LZMA
            2 | 3 => true, // LZ4/LZ4HC
            4 => true,     // Brotli
            _ => false,
        }
    }
}

/// Compression statistics
#[derive(Debug, Clone)]
pub struct CompressionStats {
    pub block_count: usize,
    pub total_compressed_size: u64,
    pub total_uncompressed_size: u64,
    pub compression_ratio: f64,
    pub space_saved: u64,
    pub average_block_size: u64,
}

impl CompressionStats {
    /// Get compression efficiency as a percentage
    pub fn efficiency_percent(&self) -> f64 {
        (1.0 - self.compression_ratio) * 100.0
    }

    /// Check if compression was effective
    pub fn is_effective(&self) -> bool {
        self.compression_ratio < 0.9 // Less than 90% of original size
    }
}

/// Compression options for bundle processing
#[derive(Debug, Clone)]
pub struct CompressionOptions {
    /// Maximum memory to use for decompression
    pub max_memory: Option<usize>,
    /// Whether to validate blocks before decompression
    pub validate_blocks: bool,
    /// Whether to collect compression statistics
    pub collect_stats: bool,
    /// Preferred compression type for new bundles
    pub preferred_compression: CompressionType,
}

impl Default for CompressionOptions {
    fn default() -> Self {
        Self {
            max_memory: Some(1024 * 1024 * 1024), // 1GB
            validate_blocks: true,
            collect_stats: false,
            preferred_compression: CompressionType::Lz4,
        }
    }
}

impl CompressionOptions {
    /// Create options for fast decompression (minimal validation)
    pub fn fast() -> Self {
        Self {
            max_memory: None,
            validate_blocks: false,
            collect_stats: false,
            preferred_compression: CompressionType::Lz4,
        }
    }

    /// Create options for safe decompression (full validation)
    pub fn safe() -> Self {
        Self {
            max_memory: Some(512 * 1024 * 1024), // 512MB
            validate_blocks: true,
            collect_stats: true,
            preferred_compression: CompressionType::Lz4,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_compression_support() {
        assert!(BundleCompression::is_compression_supported(0)); // None
        assert!(BundleCompression::is_compression_supported(1)); // LZMA
        assert!(BundleCompression::is_compression_supported(2)); // LZ4
        assert!(BundleCompression::is_compression_supported(3)); // LZ4HC
        assert!(!BundleCompression::is_compression_supported(99)); // Unknown
    }

    #[test]
    fn test_compression_stats() {
        let blocks = vec![
            CompressionBlock::new(1000, 500, 0),
            CompressionBlock::new(2000, 1000, 0),
        ];

        let stats = BundleCompression::get_compression_stats(&blocks);
        assert_eq!(stats.block_count, 2);
        assert_eq!(stats.total_compressed_size, 1500);
        assert_eq!(stats.total_uncompressed_size, 3000);
        assert_eq!(stats.compression_ratio, 0.5);
        assert_eq!(stats.space_saved, 1500);
        assert!(stats.is_effective());
    }
}