vflight 0.9.2

Share files over the Veilid distributed network with content-addressable storage
Documentation
//! Zstandard compression for file transfers.
//!
//! Compression is applied to the entire file before chunking, so the
//! chunk pipeline (hashing, encryption, resume) is unaffected.

use anyhow::{Context, Result};
use std::time::Instant;
use tracing::{debug, instrument};

use crate::metrics::{global_metrics, MetricCategory};

/// Compress data using zstd with the default compression level.
#[instrument(level = "debug", skip(data), fields(input_bytes = data.len()))]
pub fn compress(data: &[u8]) -> Result<Vec<u8>> {
    let start = Instant::now();
    let compressed = zstd::encode_all(data, 0).context("zstd compression failed")?;
    global_metrics().record(MetricCategory::FileIO, start.elapsed(), data.len() as u64);
    debug!(
        input_bytes = data.len(),
        output_bytes = compressed.len(),
        ratio = format!(
            "{:.2}%",
            compressed.len() as f64 / data.len() as f64 * 100.0
        ),
        "Compression complete"
    );
    Ok(compressed)
}

/// Decompress zstd-compressed data.
#[instrument(level = "debug", skip(data), fields(input_bytes = data.len()))]
pub fn decompress(data: &[u8]) -> Result<Vec<u8>> {
    let start = Instant::now();
    let decompressed = zstd::decode_all(data).context("zstd decompression failed")?;
    global_metrics().record(
        MetricCategory::FileIO,
        start.elapsed(),
        decompressed.len() as u64,
    );
    debug!(
        compressed_bytes = data.len(),
        decompressed_bytes = decompressed.len(),
        "Decompression complete"
    );
    Ok(decompressed)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_roundtrip() {
        let original = b"hello world, this is a compression test with some repeated data. \
                         repeated data repeated data repeated data.";
        let compressed = compress(original).unwrap();
        let decompressed = decompress(&compressed).unwrap();
        assert_eq!(decompressed, original);
    }

    #[test]
    fn test_roundtrip_empty() {
        let compressed = compress(b"").unwrap();
        let decompressed = decompress(&compressed).unwrap();
        assert!(decompressed.is_empty());
    }

    #[test]
    fn test_roundtrip_large_repeated() {
        let original = vec![42u8; 100_000];
        let compressed = compress(&original).unwrap();
        // Highly repetitive data should compress well
        assert!(compressed.len() < original.len());
        let decompressed = decompress(&compressed).unwrap();
        assert_eq!(decompressed, original);
    }

    #[test]
    fn test_roundtrip_random_like() {
        // Pseudo-random data (hard to compress) should still round-trip correctly
        let original: Vec<u8> = (0..10_000).map(|i| (i * 7 + 13) as u8).collect();
        let compressed = compress(&original).unwrap();
        let decompressed = decompress(&compressed).unwrap();
        assert_eq!(decompressed, original);
    }

    #[test]
    fn test_decompress_invalid_data() {
        let result = decompress(b"this is not zstd data");
        assert!(result.is_err());
    }
}