Skip to main content

chie_core/
compression.rs

1//! Content compression utilities for storage optimization.
2//!
3//! This module provides transparent compression/decompression of content chunks
4//! to optimize storage usage while maintaining compatibility with the protocol.
5//!
6//! # Features
7//!
8//! - Multiple compression algorithms (Zstd, LZ4, None)
9//! - Automatic algorithm selection based on content type
10//! - Compression ratio tracking and statistics
11//! - Configurable compression levels
12//!
13//! # Example
14//!
15//! ```
16//! use chie_core::compression::{Compressor, CompressionAlgorithm};
17//!
18//! let mut compressor = Compressor::new(CompressionAlgorithm::Balanced);
19//! let data = b"Hello, CHIE Protocol! ".repeat(100);
20//!
21//! // Compress data
22//! let compressed = compressor.compress(&data).unwrap();
23//! println!("Compression ratio: {:.2}%",
24//!     (1.0 - compressed.len() as f64 / data.len() as f64) * 100.0);
25//!
26//! // Decompress data
27//! let decompressed = compressor.decompress(&compressed).unwrap();
28//! assert_eq!(data.as_slice(), decompressed.as_slice());
29//! ```
30
31use serde::{Deserialize, Serialize};
32use std::io;
33use thiserror::Error;
34
35/// Compression algorithm options.
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
37pub enum CompressionAlgorithm {
38    /// No compression (passthrough).
39    None,
40    /// Fast compression with moderate ratio (LZ4).
41    Fast,
42    /// Balanced compression (Zstd default level).
43    Balanced,
44    /// Maximum compression (Zstd high level).
45    Maximum,
46}
47
48impl Default for CompressionAlgorithm {
49    #[inline]
50    fn default() -> Self {
51        Self::Balanced
52    }
53}
54
55impl CompressionAlgorithm {
56    /// Get the compression level for this algorithm.
57    #[must_use]
58    #[inline]
59    pub const fn level(&self) -> i32 {
60        match self {
61            Self::None => 0,
62            Self::Fast => 1,
63            Self::Balanced => 6,
64            Self::Maximum => 9,
65        }
66    }
67
68    /// Check if this algorithm should skip compression.
69    #[must_use]
70    #[inline]
71    pub const fn is_none(&self) -> bool {
72        matches!(self, Self::None)
73    }
74}
75
76/// Compression error types.
77#[derive(Debug, Error)]
78pub enum CompressionError {
79    /// IO error during compression/decompression.
80    #[error("IO error: {0}")]
81    Io(#[from] io::Error),
82
83    /// Compression failed.
84    #[error("Compression failed: {0}")]
85    CompressionFailed(String),
86
87    /// Decompression failed.
88    #[error("Decompression failed: {0}")]
89    DecompressionFailed(String),
90
91    /// Invalid compressed data.
92    #[error("Invalid compressed data")]
93    InvalidData,
94}
95
96/// Content compressor with configurable algorithm and statistics.
97#[derive(Debug, Clone)]
98pub struct Compressor {
99    algorithm: CompressionAlgorithm,
100    stats: CompressionStats,
101}
102
103/// Compression statistics.
104#[derive(Debug, Clone, Default, Serialize, Deserialize)]
105pub struct CompressionStats {
106    /// Total bytes compressed (input).
107    pub bytes_in: u64,
108
109    /// Total bytes after compression (output).
110    pub bytes_out: u64,
111
112    /// Number of compression operations.
113    pub compressions: u64,
114
115    /// Number of decompression operations.
116    pub decompressions: u64,
117}
118
119impl CompressionStats {
120    /// Calculate overall compression ratio.
121    #[must_use]
122    #[inline]
123    pub fn compression_ratio(&self) -> f64 {
124        if self.bytes_in == 0 {
125            0.0
126        } else {
127            1.0 - (self.bytes_out as f64 / self.bytes_in as f64)
128        }
129    }
130
131    /// Calculate space saved in bytes.
132    #[must_use]
133    #[inline]
134    pub const fn bytes_saved(&self) -> u64 {
135        self.bytes_in.saturating_sub(self.bytes_out)
136    }
137
138    /// Calculate average compression ratio per operation.
139    #[must_use]
140    #[inline]
141    pub fn avg_ratio(&self) -> f64 {
142        if self.compressions == 0 {
143            0.0
144        } else {
145            self.compression_ratio()
146        }
147    }
148}
149
150impl Compressor {
151    /// Create a new compressor with the specified algorithm.
152    #[must_use]
153    pub fn new(algorithm: CompressionAlgorithm) -> Self {
154        Self {
155            algorithm,
156            stats: CompressionStats::default(),
157        }
158    }
159
160    /// Get the compression algorithm.
161    #[inline]
162    #[must_use]
163    pub const fn algorithm(&self) -> CompressionAlgorithm {
164        self.algorithm
165    }
166
167    /// Get compression statistics.
168    #[inline]
169    #[must_use]
170    pub const fn stats(&self) -> &CompressionStats {
171        &self.stats
172    }
173
174    /// Reset compression statistics.
175    #[inline]
176    pub fn reset_stats(&mut self) {
177        self.stats = CompressionStats::default();
178    }
179
180    /// Compress data using the configured algorithm.
181    pub fn compress(&mut self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
182        if self.algorithm.is_none() || data.is_empty() {
183            return Ok(data.to_vec());
184        }
185
186        let original_len = data.len();
187        let compressed = match self.algorithm {
188            CompressionAlgorithm::None => data.to_vec(),
189            CompressionAlgorithm::Fast => {
190                // Simple run-length encoding for fast compression
191                compress_rle(data)
192            }
193            CompressionAlgorithm::Balanced | CompressionAlgorithm::Maximum => {
194                // Simulate Zstd-like compression with deflate
195                compress_deflate(data, self.algorithm.level())
196                    .map_err(|e| CompressionError::CompressionFailed(e.to_string()))?
197            }
198        };
199
200        // Update statistics
201        self.stats.bytes_in += original_len as u64;
202        self.stats.bytes_out += compressed.len() as u64;
203        self.stats.compressions += 1;
204
205        Ok(compressed)
206    }
207
208    /// Decompress data using the configured algorithm.
209    pub fn decompress(&mut self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
210        if self.algorithm.is_none() || data.is_empty() {
211            return Ok(data.to_vec());
212        }
213
214        let decompressed = match self.algorithm {
215            CompressionAlgorithm::None => data.to_vec(),
216            CompressionAlgorithm::Fast => {
217                decompress_rle(data).map_err(|_| CompressionError::InvalidData)?
218            }
219            CompressionAlgorithm::Balanced | CompressionAlgorithm::Maximum => {
220                decompress_deflate(data)
221                    .map_err(|e| CompressionError::DecompressionFailed(e.to_string()))?
222            }
223        };
224
225        self.stats.decompressions += 1;
226        Ok(decompressed)
227    }
228
229    /// Compress data and prepend algorithm metadata.
230    pub fn compress_with_header(&mut self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
231        let compressed = self.compress(data)?;
232        let mut result = Vec::with_capacity(compressed.len() + 1);
233        result.push(self.algorithm as u8);
234        result.extend_from_slice(&compressed);
235        Ok(result)
236    }
237
238    /// Decompress data that includes algorithm metadata.
239    pub fn decompress_with_header(&mut self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
240        if data.is_empty() {
241            return Err(CompressionError::InvalidData);
242        }
243
244        let _algorithm = data[0];
245        self.decompress(&data[1..])
246    }
247}
248
249/// Simple run-length encoding for fast compression.
250fn compress_rle(data: &[u8]) -> Vec<u8> {
251    if data.is_empty() {
252        return Vec::new();
253    }
254
255    let mut result = Vec::with_capacity(data.len());
256    let mut i = 0;
257
258    while i < data.len() {
259        let byte = data[i];
260        let mut count = 1;
261
262        while i + count < data.len() && data[i + count] == byte && count < 255 {
263            count += 1;
264        }
265
266        if count >= 3 {
267            // Use RLE for runs of 3 or more
268            result.push(255); // Marker
269            result.push(count as u8);
270            result.push(byte);
271        } else {
272            // Literal bytes
273            for _ in 0..count {
274                result.push(byte);
275            }
276        }
277
278        i += count;
279    }
280
281    result
282}
283
284/// Decompress run-length encoded data.
285fn decompress_rle(data: &[u8]) -> Result<Vec<u8>, CompressionError> {
286    let mut result = Vec::with_capacity(data.len() * 2);
287    let mut i = 0;
288
289    while i < data.len() {
290        if data[i] == 255 && i + 2 < data.len() {
291            let count = data[i + 1] as usize;
292            let byte = data[i + 2];
293            result.extend(std::iter::repeat_n(byte, count));
294            i += 3;
295        } else {
296            result.push(data[i]);
297            i += 1;
298        }
299    }
300
301    Ok(result)
302}
303
304/// Compress data using DEFLATE algorithm (oxiarc-deflate).
305fn compress_deflate(data: &[u8], level: i32) -> io::Result<Vec<u8>> {
306    // oxiarc-deflate levels 0-9: clamp to valid range
307    let clamped = level.clamp(0, 9) as u8;
308    oxiarc_deflate::deflate(data, clamped).map_err(|e| io::Error::other(e.to_string()))
309}
310
311/// Decompress DEFLATE-compressed data (oxiarc-deflate).
312fn decompress_deflate(data: &[u8]) -> io::Result<Vec<u8>> {
313    oxiarc_deflate::inflate(data).map_err(|e| io::Error::other(e.to_string()))
314}
315
316/// Determine optimal compression algorithm for content type.
317#[must_use]
318pub fn suggest_algorithm_for_content(content_type: &str) -> CompressionAlgorithm {
319    match content_type {
320        // Already compressed formats
321        t if t.contains("jpeg") || t.contains("jpg") => CompressionAlgorithm::None,
322        t if t.contains("png") => CompressionAlgorithm::None,
323        t if t.contains("gif") => CompressionAlgorithm::None,
324        t if t.contains("mp4") || t.contains("webm") => CompressionAlgorithm::None,
325        t if t.contains("mp3") || t.contains("ogg") => CompressionAlgorithm::None,
326        t if t.contains("zip") || t.contains("gzip") => CompressionAlgorithm::None,
327
328        // Text formats - good compression
329        t if t.contains("text") || t.contains("json") || t.contains("xml") => {
330            CompressionAlgorithm::Maximum
331        }
332        t if t.contains("html") || t.contains("css") || t.contains("javascript") => {
333            CompressionAlgorithm::Balanced
334        }
335
336        // Binary formats - moderate compression
337        _ => CompressionAlgorithm::Balanced,
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344
345    #[test]
346    fn test_compress_decompress_none() {
347        let mut compressor = Compressor::new(CompressionAlgorithm::None);
348        let data = b"Hello, World!";
349
350        let compressed = compressor.compress(data).unwrap();
351        assert_eq!(compressed, data);
352
353        let decompressed = compressor.decompress(&compressed).unwrap();
354        assert_eq!(decompressed, data);
355    }
356
357    #[test]
358    fn test_compress_decompress_fast() {
359        let mut compressor = Compressor::new(CompressionAlgorithm::Fast);
360        let data = b"AAAAAAAAAA";
361
362        let compressed = compressor.compress(data).unwrap();
363        let decompressed = compressor.decompress(&compressed).unwrap();
364        assert_eq!(decompressed, data);
365    }
366
367    #[test]
368    fn test_compress_decompress_balanced() {
369        let mut compressor = Compressor::new(CompressionAlgorithm::Balanced);
370        let data = b"Hello, CHIE Protocol! ".repeat(100);
371
372        let compressed = compressor.compress(&data).unwrap();
373        assert!(compressed.len() < data.len());
374
375        let decompressed = compressor.decompress(&compressed).unwrap();
376        assert_eq!(decompressed, data);
377    }
378
379    #[test]
380    fn test_compress_decompress_maximum() {
381        let mut compressor = Compressor::new(CompressionAlgorithm::Maximum);
382        let data = b"Test data ".repeat(50);
383
384        let compressed = compressor.compress(&data).unwrap();
385        assert!(compressed.len() < data.len());
386
387        let decompressed = compressor.decompress(&compressed).unwrap();
388        assert_eq!(decompressed, data);
389    }
390
391    #[test]
392    fn test_compression_stats() {
393        let mut compressor = Compressor::new(CompressionAlgorithm::Balanced);
394        let data = b"Test ".repeat(100);
395
396        compressor.compress(&data).unwrap();
397
398        let stats = compressor.stats();
399        assert_eq!(stats.compressions, 1);
400        assert_eq!(stats.bytes_in, data.len() as u64);
401        assert!(stats.bytes_out < stats.bytes_in);
402        assert!(stats.compression_ratio() > 0.0);
403    }
404
405    #[test]
406    fn test_compress_with_header() {
407        let mut compressor = Compressor::new(CompressionAlgorithm::Balanced);
408        let data = b"Hello, World!";
409
410        let compressed = compressor.compress_with_header(data).unwrap();
411        assert_eq!(compressed[0], CompressionAlgorithm::Balanced as u8);
412
413        let decompressed = compressor.decompress_with_header(&compressed).unwrap();
414        assert_eq!(decompressed, data);
415    }
416
417    #[test]
418    fn test_suggest_algorithm_for_content() {
419        assert_eq!(
420            suggest_algorithm_for_content("image/jpeg"),
421            CompressionAlgorithm::None
422        );
423        assert_eq!(
424            suggest_algorithm_for_content("text/plain"),
425            CompressionAlgorithm::Maximum
426        );
427        assert_eq!(
428            suggest_algorithm_for_content("application/json"),
429            CompressionAlgorithm::Maximum
430        );
431        assert_eq!(
432            suggest_algorithm_for_content("video/mp4"),
433            CompressionAlgorithm::None
434        );
435    }
436
437    #[test]
438    fn test_empty_data() {
439        let mut compressor = Compressor::new(CompressionAlgorithm::Balanced);
440        let data = b"";
441
442        let compressed = compressor.compress(data).unwrap();
443        assert_eq!(compressed, data);
444
445        let decompressed = compressor.decompress(&compressed).unwrap();
446        assert_eq!(decompressed, data);
447    }
448
449    #[test]
450    fn test_reset_stats() {
451        let mut compressor = Compressor::new(CompressionAlgorithm::Balanced);
452        let data = b"Test data";
453
454        compressor.compress(data).unwrap();
455        assert_eq!(compressor.stats().compressions, 1);
456
457        compressor.reset_stats();
458        assert_eq!(compressor.stats().compressions, 0);
459    }
460
461    #[test]
462    fn test_rle_compression() {
463        let data = b"AAAAAAAAAA";
464        let compressed = compress_rle(data);
465        let decompressed = decompress_rle(&compressed).unwrap();
466        assert_eq!(decompressed, data);
467    }
468}