lcpfs 2026.1.102

LCP File System - A ZFS-inspired copy-on-write filesystem for Rust
// Copyright 2025 LunaOS Contributors
// SPDX-License-Identifier: Apache-2.0

//! # Compression Engine
//!
//! This module provides tiered compression for LCPFS with automatic algorithm
//! selection based on data temperature and access patterns.
//!
//! ## Compression Tiers
//!
//! LCPFS uses a thermodynamically-optimized compression strategy:
//!
//! | Tier | Algorithm | Ratio  | Speed     | Use Case          |
//! |------|-----------|--------|-----------|-------------------|
//! | Hot  | LZ4       | 2-3x   | ~3 GB/s   | Frequently accessed |
//! | Warm | ZSTD      | 4-10x  | ~500 MB/s | Moderate access   |
//! | Cold | LZMA      | 10-20x | ~100 MB/s | Archival data     |
//!
//! ## Feature Flags
//!
//! - **Default (no_std)**: LZ4 only - always available
//! - **std feature**: Enables ZSTD and LZMA compression
//!
//! ## Usage
//!
//!
//!
//! ## Compression Decisions
//!
//! The compression engine automatically skips compression when:
//! - Data is too small (< 64 bytes for LZ4, < 128 for ZSTD, < 256 for LZMA)
//! - Compressed size >= original size (incompressible data)

use alloc::vec::Vec;
use lz4_flex::{compress_prepend_size, decompress_size_prepended};

#[cfg(feature = "std")]
use std::io::{Read, Write};

/// Compression type identifier
///
/// Tiered compression strategy (thermodynamically-optimized by access pattern):
/// - LZ4:  Hot data  - ~2-3x ratio, extremely fast (no_std - ALWAYS available)
/// - ZSTD: Warm data - ~4-10x ratio, balanced (std feature)
/// - LZMA: Cold data - ~10-20x ratio, archival (std feature)
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompressionType {
    /// No compression
    None = 0,
    /// LZ4 compression (fast, low ratio)
    Lz4 = 1,
    /// ZSTD compression (balanced)
    #[cfg(feature = "std")]
    Zstd = 2,
    /// LZMA compression (slow, high ratio)
    #[cfg(feature = "std")]
    Lzma = 3,
}

/// LZ4 compression implementation
pub struct Lz4Compressor;
/// ZSTD compression implementation
#[cfg(feature = "std")]
pub struct ZstdCompressor;
/// LZMA compression implementation
#[cfg(feature = "std")]
pub struct LzmaCompressor;

/// High-level compression API
pub struct Compress; // High-level API

impl Compress {
    /// Compress data using the specified compression type
    ///
    /// Automatically selects compression based on access pattern:
    /// - Hot data (frequent access): LZ4
    /// - Warm data (moderate access): ZSTD (requires std feature)
    /// - Cold data (archival): LZMA (requires std feature)
    pub fn compress_with_type(data: &[u8], comp_type: CompressionType) -> Option<Vec<u8>> {
        match comp_type {
            CompressionType::None => Some(data.to_vec()),
            CompressionType::Lz4 => Lz4Compressor::compress(data).ok(),
            #[cfg(feature = "std")]
            CompressionType::Zstd => ZstdCompressor::compress(data).ok(),
            #[cfg(feature = "std")]
            CompressionType::Lzma => LzmaCompressor::compress(data).ok(),
        }
    }

    /// Decompress data based on compression type
    pub fn decompress_with_type(data: &[u8], comp_type: CompressionType) -> Option<Vec<u8>> {
        match comp_type {
            CompressionType::None => Some(data.to_vec()),
            CompressionType::Lz4 => Lz4Compressor::decompress(data, 0).ok(),
            #[cfg(feature = "std")]
            CompressionType::Zstd => ZstdCompressor::decompress(data).ok(),
            #[cfg(feature = "std")]
            CompressionType::Lzma => LzmaCompressor::decompress(data).ok(),
        }
    }

    /// Compress data using LZ4 (default, fast, ~2-3x ratio)
    ///
    /// Backwards-compatible with original API.
    pub fn compress(data: &[u8]) -> Option<Vec<u8>> {
        Lz4Compressor::compress(data).ok()
    }

    /// Decompress LZ4 data (default)
    ///
    /// Backwards-compatible with original API.
    pub fn decompress(data: &[u8]) -> Option<Vec<u8>> {
        Lz4Compressor::decompress(data, 0).ok()
    }
}

impl Lz4Compressor {
    /// Compress data using LZ4 (fast compression, ~2-3x ratio)
    ///
    /// Hot data compression - optimized for speed over ratio.
    /// Used for frequently accessed data where decompression latency matters.
    pub fn compress(data: &[u8]) -> Result<Vec<u8>, &'static str> {
        // Skip compression for very small data
        if data.len() < 64 {
            return Ok(data.to_vec());
        }

        let compressed = compress_prepend_size(data);

        // Only use compression if it actually saves space
        if compressed.len() >= data.len() {
            return Ok(data.to_vec());
        }

        Ok(compressed)
    }

    /// Decompress LZ4 data
    pub fn decompress(compressed: &[u8], _original_size: usize) -> Result<Vec<u8>, &'static str> {
        decompress_size_prepended(compressed).map_err(|_| "LZ4 Decompression Failed")
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// ZSTD COMPRESSION (requires std feature)
// Warm data - balanced compression ratio (~4-10x) with good speed
// ═══════════════════════════════════════════════════════════════════════════════

#[cfg(feature = "std")]
impl ZstdCompressor {
    /// Compress data using ZSTD (balanced compression, ~4-10x ratio)
    ///
    /// Warm data compression - good balance between speed and ratio.
    /// Used for moderately accessed data (hourly/daily access patterns).
    /// Compression level 3 provides ~6x ratio with acceptable speed.
    pub fn compress(data: &[u8]) -> Result<Vec<u8>, &'static str> {
        // Skip compression for very small data
        if data.len() < 128 {
            return Ok(data.to_vec());
        }

        // Level 3: Good balance (~6x ratio, fast decompression)
        let compressed = zstd::encode_all(data, 3).map_err(|_| "ZSTD compression failed")?;

        // Only use compression if it actually saves space
        if compressed.len() >= data.len() {
            return Ok(data.to_vec());
        }

        Ok(compressed)
    }

    /// Decompress ZSTD data
    pub fn decompress(compressed: &[u8]) -> Result<Vec<u8>, &'static str> {
        zstd::decode_all(compressed).map_err(|_| "ZSTD decompression failed")
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// LZMA COMPRESSION (requires std feature)
// Cold data - maximum compression ratio (~10-20x) for archival
// ═══════════════════════════════════════════════════════════════════════════════

#[cfg(feature = "std")]
impl LzmaCompressor {
    /// Compress data using LZMA (maximum compression, ~10-20x ratio)
    ///
    /// Cold data compression - optimized for ratio over speed.
    /// Used for archival data and W_temporal consciousness ledger.
    /// Compression level 6 (medium-high) provides excellent ratio.
    pub fn compress(data: &[u8]) -> Result<Vec<u8>, &'static str> {
        // Skip compression for very small data
        if data.len() < 256 {
            return Ok(data.to_vec());
        }

        let mut compressed = Vec::new();
        let mut encoder = xz2::write::XzEncoder::new(&mut compressed, 6);
        encoder
            .write_all(data)
            .map_err(|_| "LZMA compression write failed")?;
        encoder
            .finish()
            .map_err(|_| "LZMA compression finish failed")?;

        // Only use compression if it actually saves space
        if compressed.len() >= data.len() {
            return Ok(data.to_vec());
        }

        Ok(compressed)
    }

    /// Decompress LZMA data
    pub fn decompress(compressed: &[u8]) -> Result<Vec<u8>, &'static str> {
        let mut decompressed = Vec::new();
        let mut decoder = xz2::read::XzDecoder::new(compressed);
        decoder
            .read_to_end(&mut decompressed)
            .map_err(|_| "LZMA decompression failed")?;
        Ok(decompressed)
    }
}