llm_cost_ops/compression/
codec.rs

1// Compression/decompression codec implementation
2
3use super::{CompressionAlgorithm, CompressionError, CompressionLevel, CompressionResult, CompressionStats};
4use flate2::read::{GzDecoder, ZlibDecoder};
5use flate2::write::{GzEncoder, ZlibEncoder};
6use std::io::{Read, Write};
7use std::time::Instant;
8
9/// Compressor trait for different algorithms
10pub trait Compressor: Send + Sync {
11    /// Compress data
12    fn compress(&self, data: &[u8], level: CompressionLevel) -> CompressionResult<Vec<u8>>;
13
14    /// Decompress data
15    fn decompress(&self, data: &[u8]) -> CompressionResult<Vec<u8>>;
16
17    /// Get algorithm
18    fn algorithm(&self) -> CompressionAlgorithm;
19}
20
21/// Gzip compressor
22#[derive(Debug, Clone, Default)]
23pub struct GzipCompressor;
24
25impl Compressor for GzipCompressor {
26    fn compress(&self, data: &[u8], level: CompressionLevel) -> CompressionResult<Vec<u8>> {
27        let compression_level = flate2::Compression::new(level.gzip_level());
28        let mut encoder = GzEncoder::new(Vec::new(), compression_level);
29
30        encoder
31            .write_all(data)
32            .map_err(|e| CompressionError::CompressionFailed(e.to_string()))?;
33
34        encoder
35            .finish()
36            .map_err(|e| CompressionError::CompressionFailed(e.to_string()))
37    }
38
39    fn decompress(&self, data: &[u8]) -> CompressionResult<Vec<u8>> {
40        let mut decoder = GzDecoder::new(data);
41        let mut decompressed = Vec::new();
42
43        decoder
44            .read_to_end(&mut decompressed)
45            .map_err(|e| CompressionError::DecompressionFailed(e.to_string()))?;
46
47        Ok(decompressed)
48    }
49
50    fn algorithm(&self) -> CompressionAlgorithm {
51        CompressionAlgorithm::Gzip
52    }
53}
54
55/// Brotli compressor
56#[derive(Debug, Clone, Default)]
57pub struct BrotliCompressor;
58
59impl Compressor for BrotliCompressor {
60    fn compress(&self, data: &[u8], level: CompressionLevel) -> CompressionResult<Vec<u8>> {
61        let quality = level.brotli_level() as i32;
62        let mut output = Vec::new();
63
64        brotli::BrotliCompress(
65            &mut std::io::Cursor::new(data),
66            &mut output,
67            &brotli::enc::BrotliEncoderParams {
68                quality,
69                ..Default::default()
70            },
71        )
72        .map_err(|e| CompressionError::CompressionFailed(e.to_string()))?;
73
74        Ok(output)
75    }
76
77    fn decompress(&self, data: &[u8]) -> CompressionResult<Vec<u8>> {
78        let mut output = Vec::new();
79
80        brotli::BrotliDecompress(&mut std::io::Cursor::new(data), &mut output)
81            .map_err(|e| CompressionError::DecompressionFailed(e.to_string()))?;
82
83        Ok(output)
84    }
85
86    fn algorithm(&self) -> CompressionAlgorithm {
87        CompressionAlgorithm::Brotli
88    }
89}
90
91/// Deflate compressor
92#[derive(Debug, Clone, Default)]
93pub struct DeflateCompressor;
94
95impl Compressor for DeflateCompressor {
96    fn compress(&self, data: &[u8], level: CompressionLevel) -> CompressionResult<Vec<u8>> {
97        let compression_level = flate2::Compression::new(level.gzip_level());
98        let mut encoder = ZlibEncoder::new(Vec::new(), compression_level);
99
100        encoder
101            .write_all(data)
102            .map_err(|e| CompressionError::CompressionFailed(e.to_string()))?;
103
104        encoder
105            .finish()
106            .map_err(|e| CompressionError::CompressionFailed(e.to_string()))
107    }
108
109    fn decompress(&self, data: &[u8]) -> CompressionResult<Vec<u8>> {
110        let mut decoder = ZlibDecoder::new(data);
111        let mut decompressed = Vec::new();
112
113        decoder
114            .read_to_end(&mut decompressed)
115            .map_err(|e| CompressionError::DecompressionFailed(e.to_string()))?;
116
117        Ok(decompressed)
118    }
119
120    fn algorithm(&self) -> CompressionAlgorithm {
121        CompressionAlgorithm::Deflate
122    }
123}
124
125/// Get compressor for algorithm
126pub fn get_compressor(algorithm: CompressionAlgorithm) -> CompressionResult<Box<dyn Compressor>> {
127    match algorithm {
128        CompressionAlgorithm::Gzip => Ok(Box::new(GzipCompressor)),
129        CompressionAlgorithm::Brotli => Ok(Box::new(BrotliCompressor)),
130        CompressionAlgorithm::Deflate => Ok(Box::new(DeflateCompressor)),
131        CompressionAlgorithm::Identity => Err(CompressionError::UnsupportedAlgorithm(
132            "Identity encoding does not require compression".to_string(),
133        )),
134    }
135}
136
137/// Compress data with specified algorithm and level
138pub fn compress(
139    data: &[u8],
140    algorithm: CompressionAlgorithm,
141    level: CompressionLevel,
142) -> CompressionResult<(Vec<u8>, CompressionStats)> {
143    if algorithm == CompressionAlgorithm::Identity {
144        return Ok((
145            data.to_vec(),
146            CompressionStats::new(data.len(), data.len(), algorithm, 0.0),
147        ));
148    }
149
150    let start = Instant::now();
151    let compressor = get_compressor(algorithm)?;
152    let compressed = compressor.compress(data, level)?;
153    let duration_ms = start.elapsed().as_secs_f64() * 1000.0;
154
155    let stats = CompressionStats::new(data.len(), compressed.len(), algorithm, duration_ms);
156
157    Ok((compressed, stats))
158}
159
160/// Decompress data with specified algorithm
161pub fn decompress(
162    data: &[u8],
163    algorithm: CompressionAlgorithm,
164) -> CompressionResult<(Vec<u8>, CompressionStats)> {
165    if algorithm == CompressionAlgorithm::Identity {
166        return Ok((
167            data.to_vec(),
168            CompressionStats::new(data.len(), data.len(), algorithm, 0.0),
169        ));
170    }
171
172    let start = Instant::now();
173    let compressor = get_compressor(algorithm)?;
174    let decompressed = compressor.decompress(data)?;
175    let duration_ms = start.elapsed().as_secs_f64() * 1000.0;
176
177    let stats = CompressionStats::new(decompressed.len(), data.len(), algorithm, duration_ms);
178
179    Ok((decompressed, stats))
180}
181
182/// Compress data with automatic algorithm selection
183pub fn compress_auto(
184    data: &[u8],
185    level: CompressionLevel,
186) -> CompressionResult<(Vec<u8>, CompressionAlgorithm, CompressionStats)> {
187    // Try brotli first (best compression ratio)
188    match compress(data, CompressionAlgorithm::Brotli, level) {
189        Ok((compressed, stats)) => Ok((compressed, CompressionAlgorithm::Brotli, stats)),
190        Err(_) => {
191            // Fallback to gzip
192            let (compressed, stats) = compress(data, CompressionAlgorithm::Gzip, level)?;
193            Ok((compressed, CompressionAlgorithm::Gzip, stats))
194        }
195    }
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201
202    const TEST_DATA: &str = "Hello, World! This is a test string that should compress well because it has repetition. Hello, World! This is a test string that should compress well because it has repetition.";
203
204    #[test]
205    fn test_gzip_compress_decompress() {
206        let compressor = GzipCompressor;
207        let data = TEST_DATA.as_bytes();
208
209        let compressed = compressor.compress(data, CompressionLevel::Default).unwrap();
210        assert!(compressed.len() < data.len());
211
212        let decompressed = compressor.decompress(&compressed).unwrap();
213        assert_eq!(decompressed, data);
214    }
215
216    #[test]
217    fn test_brotli_compress_decompress() {
218        let compressor = BrotliCompressor;
219        let data = TEST_DATA.as_bytes();
220
221        let compressed = compressor.compress(data, CompressionLevel::Default).unwrap();
222        assert!(compressed.len() < data.len());
223
224        let decompressed = compressor.decompress(&compressed).unwrap();
225        assert_eq!(decompressed, data);
226    }
227
228    #[test]
229    fn test_deflate_compress_decompress() {
230        let compressor = DeflateCompressor;
231        let data = TEST_DATA.as_bytes();
232
233        let compressed = compressor.compress(data, CompressionLevel::Default).unwrap();
234        assert!(compressed.len() < data.len());
235
236        let decompressed = compressor.decompress(&compressed).unwrap();
237        assert_eq!(decompressed, data);
238    }
239
240    #[test]
241    fn test_compress_function() {
242        let data = TEST_DATA.as_bytes();
243
244        let (compressed, stats) =
245            compress(data, CompressionAlgorithm::Gzip, CompressionLevel::Default).unwrap();
246
247        assert!(compressed.len() < data.len());
248        assert_eq!(stats.original_size, data.len());
249        assert_eq!(stats.compressed_size, compressed.len());
250        assert!(stats.compression_ratio < 1.0);
251        assert_eq!(stats.algorithm, Some(CompressionAlgorithm::Gzip));
252    }
253
254    #[test]
255    fn test_decompress_function() {
256        let data = TEST_DATA.as_bytes();
257
258        let (compressed, _) =
259            compress(data, CompressionAlgorithm::Gzip, CompressionLevel::Default).unwrap();
260
261        let (decompressed, stats) = decompress(&compressed, CompressionAlgorithm::Gzip).unwrap();
262
263        assert_eq!(decompressed, data);
264        assert_eq!(stats.original_size, data.len());
265        assert_eq!(stats.compressed_size, compressed.len());
266    }
267
268    #[test]
269    fn test_compression_levels() {
270        // Use larger data to ensure compression overhead doesn't skew results
271        let large_data = TEST_DATA.repeat(100);
272
273        let (fast, _) =
274            compress(large_data.as_bytes(), CompressionAlgorithm::Gzip, CompressionLevel::Fastest).unwrap();
275        let (best, _) =
276            compress(large_data.as_bytes(), CompressionAlgorithm::Gzip, CompressionLevel::Best).unwrap();
277
278        // Best compression should produce smaller or equal output for large data
279        // For small data, compression overhead can make "best" larger than "fastest"
280        assert!(best.len() <= fast.len());
281    }
282
283    #[test]
284    fn test_identity_compression() {
285        let data = TEST_DATA.as_bytes();
286
287        let (compressed, stats) =
288            compress(data, CompressionAlgorithm::Identity, CompressionLevel::Default).unwrap();
289
290        assert_eq!(compressed, data);
291        assert_eq!(stats.compression_ratio, 1.0);
292    }
293
294    #[test]
295    fn test_compress_auto() {
296        let data = TEST_DATA.as_bytes();
297
298        let (compressed, algorithm, stats) = compress_auto(data, CompressionLevel::Default).unwrap();
299
300        assert!(compressed.len() < data.len());
301        assert_eq!(algorithm, CompressionAlgorithm::Brotli);
302        assert!(stats.compression_ratio < 1.0);
303    }
304
305    #[test]
306    fn test_brotli_better_than_gzip() {
307        let data = TEST_DATA.repeat(10).as_bytes().to_vec();
308
309        let (gzip_compressed, _) =
310            compress(&data, CompressionAlgorithm::Gzip, CompressionLevel::Best).unwrap();
311        let (brotli_compressed, _) =
312            compress(&data, CompressionAlgorithm::Brotli, CompressionLevel::Best).unwrap();
313
314        // Brotli should generally produce smaller output
315        assert!(brotli_compressed.len() <= gzip_compressed.len());
316    }
317
318    #[test]
319    fn test_round_trip_all_algorithms() {
320        let data = TEST_DATA.as_bytes();
321
322        for algorithm in [
323            CompressionAlgorithm::Gzip,
324            CompressionAlgorithm::Brotli,
325            CompressionAlgorithm::Deflate,
326        ] {
327            let (compressed, _) = compress(data, algorithm, CompressionLevel::Default).unwrap();
328            let (decompressed, _) = decompress(&compressed, algorithm).unwrap();
329            assert_eq!(decompressed, data, "Failed for algorithm: {:?}", algorithm);
330        }
331    }
332}