firecloud-storage 0.2.0

Chunking, compression, and local storage for FireCloud distributed storage
Documentation
//! Adaptive compression using Zstd and LZ4

use crate::{StorageError, StorageResult};

/// Compression level presets
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompressionLevel {
    /// No compression (for already compressed data like images, videos)
    None,
    /// Fast compression using LZ4 (for real-time transfers)
    Fast,
    /// Balanced Zstd level 3
    Balanced,
    /// Best ratio Zstd level 9 (for storage)
    Best,
}

impl Default for CompressionLevel {
    fn default() -> Self {
        Self::Balanced
    }
}

/// Compress data with the specified level
pub fn compress(data: &[u8], level: CompressionLevel) -> StorageResult<Vec<u8>> {
    match level {
        CompressionLevel::None => Ok(data.to_vec()),

        CompressionLevel::Fast => {
            // LZ4 compression
            Ok(lz4_flex::compress_prepend_size(data))
        }

        CompressionLevel::Balanced => {
            // Zstd level 3
            zstd::encode_all(data, 3).map_err(|e| StorageError::Compression(e.to_string()))
        }

        CompressionLevel::Best => {
            // Zstd level 9
            zstd::encode_all(data, 9).map_err(|e| StorageError::Compression(e.to_string()))
        }
    }
}

/// Decompress data (auto-detects format based on magic bytes)
pub fn decompress(data: &[u8], was_lz4: bool) -> StorageResult<Vec<u8>> {
    if was_lz4 {
        // LZ4 decompression
        lz4_flex::decompress_size_prepended(data)
            .map_err(|e| StorageError::Decompression(e.to_string()))
    } else {
        // Zstd decompression
        zstd::decode_all(data).map_err(|e| StorageError::Decompression(e.to_string()))
    }
}

/// Detect if a file type is already compressed (skip compression)
/// 
/// TODO: Use this heuristic in chunker.rs to avoid redundant compression
/// of already-compressed formats (images, videos, archives)
#[allow(dead_code)]
pub fn should_compress(mime_type: Option<&str>, data: &[u8]) -> bool {
    // Check MIME type first
    if let Some(mime) = mime_type {
        let skip_types = [
            "image/jpeg",
            "image/png",
            "image/gif",
            "image/webp",
            "video/",
            "audio/",
            "application/zip",
            "application/gzip",
            "application/x-7z-compressed",
            "application/x-rar-compressed",
        ];

        for skip in &skip_types {
            if mime.starts_with(skip) {
                return false;
            }
        }
    }

    // Check magic bytes for common compressed formats
    if data.len() >= 4 {
        // JPEG
        if data.starts_with(&[0xFF, 0xD8, 0xFF]) {
            return false;
        }
        // PNG
        if data.starts_with(&[0x89, 0x50, 0x4E, 0x47]) {
            return false;
        }
        // GIF
        if data.starts_with(b"GIF8") {
            return false;
        }
        // ZIP/DOCX/XLSX
        if data.starts_with(&[0x50, 0x4B, 0x03, 0x04]) {
            return false;
        }
        // GZIP
        if data.starts_with(&[0x1F, 0x8B]) {
            return false;
        }
        // MP4/MOV
        if data.len() >= 8 && &data[4..8] == b"ftyp" {
            return false;
        }
    }

    true
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_zstd_roundtrip() {
        let original = b"Hello, FireCloud! This is test data for compression.";

        let compressed = compress(original, CompressionLevel::Balanced).unwrap();
        let decompressed = decompress(&compressed, false).unwrap();

        assert_eq!(decompressed, original);
    }

    #[test]
    fn test_lz4_roundtrip() {
        let original = b"Hello, FireCloud! This is test data for LZ4 compression.";

        let compressed = compress(original, CompressionLevel::Fast).unwrap();
        let decompressed = decompress(&compressed, true).unwrap();

        assert_eq!(decompressed, original);
    }

    #[test]
    fn test_no_compression() {
        let original = b"Raw data";

        let result = compress(original, CompressionLevel::None).unwrap();

        assert_eq!(result, original);
    }

    #[test]
    fn test_should_compress() {
        // JPEG magic bytes - should NOT compress
        let jpeg = [0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
        assert!(!should_compress(None, &jpeg));

        // Text data - should compress
        let text = b"This is plain text that should be compressed";
        assert!(should_compress(None, text));

        // MIME type check
        assert!(!should_compress(Some("image/jpeg"), &[]));
        assert!(should_compress(Some("text/plain"), &[]));
    }
}