llm_cost_ops/compression/
codec.rs1use super::{CompressionAlgorithm, CompressionError, CompressionLevel, CompressionResult, CompressionStats};
4use flate2::read::{GzDecoder, ZlibDecoder};
5use flate2::write::{GzEncoder, ZlibEncoder};
6use std::io::{Read, Write};
7use std::time::Instant;
8
9pub trait Compressor: Send + Sync {
11 fn compress(&self, data: &[u8], level: CompressionLevel) -> CompressionResult<Vec<u8>>;
13
14 fn decompress(&self, data: &[u8]) -> CompressionResult<Vec<u8>>;
16
17 fn algorithm(&self) -> CompressionAlgorithm;
19}
20
21#[derive(Debug, Clone, Default)]
23pub struct GzipCompressor;
24
25impl Compressor for GzipCompressor {
26 fn compress(&self, data: &[u8], level: CompressionLevel) -> CompressionResult<Vec<u8>> {
27 let compression_level = flate2::Compression::new(level.gzip_level());
28 let mut encoder = GzEncoder::new(Vec::new(), compression_level);
29
30 encoder
31 .write_all(data)
32 .map_err(|e| CompressionError::CompressionFailed(e.to_string()))?;
33
34 encoder
35 .finish()
36 .map_err(|e| CompressionError::CompressionFailed(e.to_string()))
37 }
38
39 fn decompress(&self, data: &[u8]) -> CompressionResult<Vec<u8>> {
40 let mut decoder = GzDecoder::new(data);
41 let mut decompressed = Vec::new();
42
43 decoder
44 .read_to_end(&mut decompressed)
45 .map_err(|e| CompressionError::DecompressionFailed(e.to_string()))?;
46
47 Ok(decompressed)
48 }
49
50 fn algorithm(&self) -> CompressionAlgorithm {
51 CompressionAlgorithm::Gzip
52 }
53}
54
55#[derive(Debug, Clone, Default)]
57pub struct BrotliCompressor;
58
59impl Compressor for BrotliCompressor {
60 fn compress(&self, data: &[u8], level: CompressionLevel) -> CompressionResult<Vec<u8>> {
61 let quality = level.brotli_level() as i32;
62 let mut output = Vec::new();
63
64 brotli::BrotliCompress(
65 &mut std::io::Cursor::new(data),
66 &mut output,
67 &brotli::enc::BrotliEncoderParams {
68 quality,
69 ..Default::default()
70 },
71 )
72 .map_err(|e| CompressionError::CompressionFailed(e.to_string()))?;
73
74 Ok(output)
75 }
76
77 fn decompress(&self, data: &[u8]) -> CompressionResult<Vec<u8>> {
78 let mut output = Vec::new();
79
80 brotli::BrotliDecompress(&mut std::io::Cursor::new(data), &mut output)
81 .map_err(|e| CompressionError::DecompressionFailed(e.to_string()))?;
82
83 Ok(output)
84 }
85
86 fn algorithm(&self) -> CompressionAlgorithm {
87 CompressionAlgorithm::Brotli
88 }
89}
90
91#[derive(Debug, Clone, Default)]
93pub struct DeflateCompressor;
94
95impl Compressor for DeflateCompressor {
96 fn compress(&self, data: &[u8], level: CompressionLevel) -> CompressionResult<Vec<u8>> {
97 let compression_level = flate2::Compression::new(level.gzip_level());
98 let mut encoder = ZlibEncoder::new(Vec::new(), compression_level);
99
100 encoder
101 .write_all(data)
102 .map_err(|e| CompressionError::CompressionFailed(e.to_string()))?;
103
104 encoder
105 .finish()
106 .map_err(|e| CompressionError::CompressionFailed(e.to_string()))
107 }
108
109 fn decompress(&self, data: &[u8]) -> CompressionResult<Vec<u8>> {
110 let mut decoder = ZlibDecoder::new(data);
111 let mut decompressed = Vec::new();
112
113 decoder
114 .read_to_end(&mut decompressed)
115 .map_err(|e| CompressionError::DecompressionFailed(e.to_string()))?;
116
117 Ok(decompressed)
118 }
119
120 fn algorithm(&self) -> CompressionAlgorithm {
121 CompressionAlgorithm::Deflate
122 }
123}
124
125pub fn get_compressor(algorithm: CompressionAlgorithm) -> CompressionResult<Box<dyn Compressor>> {
127 match algorithm {
128 CompressionAlgorithm::Gzip => Ok(Box::new(GzipCompressor)),
129 CompressionAlgorithm::Brotli => Ok(Box::new(BrotliCompressor)),
130 CompressionAlgorithm::Deflate => Ok(Box::new(DeflateCompressor)),
131 CompressionAlgorithm::Identity => Err(CompressionError::UnsupportedAlgorithm(
132 "Identity encoding does not require compression".to_string(),
133 )),
134 }
135}
136
137pub fn compress(
139 data: &[u8],
140 algorithm: CompressionAlgorithm,
141 level: CompressionLevel,
142) -> CompressionResult<(Vec<u8>, CompressionStats)> {
143 if algorithm == CompressionAlgorithm::Identity {
144 return Ok((
145 data.to_vec(),
146 CompressionStats::new(data.len(), data.len(), algorithm, 0.0),
147 ));
148 }
149
150 let start = Instant::now();
151 let compressor = get_compressor(algorithm)?;
152 let compressed = compressor.compress(data, level)?;
153 let duration_ms = start.elapsed().as_secs_f64() * 1000.0;
154
155 let stats = CompressionStats::new(data.len(), compressed.len(), algorithm, duration_ms);
156
157 Ok((compressed, stats))
158}
159
160pub fn decompress(
162 data: &[u8],
163 algorithm: CompressionAlgorithm,
164) -> CompressionResult<(Vec<u8>, CompressionStats)> {
165 if algorithm == CompressionAlgorithm::Identity {
166 return Ok((
167 data.to_vec(),
168 CompressionStats::new(data.len(), data.len(), algorithm, 0.0),
169 ));
170 }
171
172 let start = Instant::now();
173 let compressor = get_compressor(algorithm)?;
174 let decompressed = compressor.decompress(data)?;
175 let duration_ms = start.elapsed().as_secs_f64() * 1000.0;
176
177 let stats = CompressionStats::new(decompressed.len(), data.len(), algorithm, duration_ms);
178
179 Ok((decompressed, stats))
180}
181
182pub fn compress_auto(
184 data: &[u8],
185 level: CompressionLevel,
186) -> CompressionResult<(Vec<u8>, CompressionAlgorithm, CompressionStats)> {
187 match compress(data, CompressionAlgorithm::Brotli, level) {
189 Ok((compressed, stats)) => Ok((compressed, CompressionAlgorithm::Brotli, stats)),
190 Err(_) => {
191 let (compressed, stats) = compress(data, CompressionAlgorithm::Gzip, level)?;
193 Ok((compressed, CompressionAlgorithm::Gzip, stats))
194 }
195 }
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201
202 const TEST_DATA: &str = "Hello, World! This is a test string that should compress well because it has repetition. Hello, World! This is a test string that should compress well because it has repetition.";
203
204 #[test]
205 fn test_gzip_compress_decompress() {
206 let compressor = GzipCompressor;
207 let data = TEST_DATA.as_bytes();
208
209 let compressed = compressor.compress(data, CompressionLevel::Default).unwrap();
210 assert!(compressed.len() < data.len());
211
212 let decompressed = compressor.decompress(&compressed).unwrap();
213 assert_eq!(decompressed, data);
214 }
215
216 #[test]
217 fn test_brotli_compress_decompress() {
218 let compressor = BrotliCompressor;
219 let data = TEST_DATA.as_bytes();
220
221 let compressed = compressor.compress(data, CompressionLevel::Default).unwrap();
222 assert!(compressed.len() < data.len());
223
224 let decompressed = compressor.decompress(&compressed).unwrap();
225 assert_eq!(decompressed, data);
226 }
227
228 #[test]
229 fn test_deflate_compress_decompress() {
230 let compressor = DeflateCompressor;
231 let data = TEST_DATA.as_bytes();
232
233 let compressed = compressor.compress(data, CompressionLevel::Default).unwrap();
234 assert!(compressed.len() < data.len());
235
236 let decompressed = compressor.decompress(&compressed).unwrap();
237 assert_eq!(decompressed, data);
238 }
239
240 #[test]
241 fn test_compress_function() {
242 let data = TEST_DATA.as_bytes();
243
244 let (compressed, stats) =
245 compress(data, CompressionAlgorithm::Gzip, CompressionLevel::Default).unwrap();
246
247 assert!(compressed.len() < data.len());
248 assert_eq!(stats.original_size, data.len());
249 assert_eq!(stats.compressed_size, compressed.len());
250 assert!(stats.compression_ratio < 1.0);
251 assert_eq!(stats.algorithm, Some(CompressionAlgorithm::Gzip));
252 }
253
254 #[test]
255 fn test_decompress_function() {
256 let data = TEST_DATA.as_bytes();
257
258 let (compressed, _) =
259 compress(data, CompressionAlgorithm::Gzip, CompressionLevel::Default).unwrap();
260
261 let (decompressed, stats) = decompress(&compressed, CompressionAlgorithm::Gzip).unwrap();
262
263 assert_eq!(decompressed, data);
264 assert_eq!(stats.original_size, data.len());
265 assert_eq!(stats.compressed_size, compressed.len());
266 }
267
268 #[test]
269 fn test_compression_levels() {
270 let large_data = TEST_DATA.repeat(100);
272
273 let (fast, _) =
274 compress(large_data.as_bytes(), CompressionAlgorithm::Gzip, CompressionLevel::Fastest).unwrap();
275 let (best, _) =
276 compress(large_data.as_bytes(), CompressionAlgorithm::Gzip, CompressionLevel::Best).unwrap();
277
278 assert!(best.len() <= fast.len());
281 }
282
283 #[test]
284 fn test_identity_compression() {
285 let data = TEST_DATA.as_bytes();
286
287 let (compressed, stats) =
288 compress(data, CompressionAlgorithm::Identity, CompressionLevel::Default).unwrap();
289
290 assert_eq!(compressed, data);
291 assert_eq!(stats.compression_ratio, 1.0);
292 }
293
294 #[test]
295 fn test_compress_auto() {
296 let data = TEST_DATA.as_bytes();
297
298 let (compressed, algorithm, stats) = compress_auto(data, CompressionLevel::Default).unwrap();
299
300 assert!(compressed.len() < data.len());
301 assert_eq!(algorithm, CompressionAlgorithm::Brotli);
302 assert!(stats.compression_ratio < 1.0);
303 }
304
305 #[test]
306 fn test_brotli_better_than_gzip() {
307 let data = TEST_DATA.repeat(10).as_bytes().to_vec();
308
309 let (gzip_compressed, _) =
310 compress(&data, CompressionAlgorithm::Gzip, CompressionLevel::Best).unwrap();
311 let (brotli_compressed, _) =
312 compress(&data, CompressionAlgorithm::Brotli, CompressionLevel::Best).unwrap();
313
314 assert!(brotli_compressed.len() <= gzip_compressed.len());
316 }
317
318 #[test]
319 fn test_round_trip_all_algorithms() {
320 let data = TEST_DATA.as_bytes();
321
322 for algorithm in [
323 CompressionAlgorithm::Gzip,
324 CompressionAlgorithm::Brotli,
325 CompressionAlgorithm::Deflate,
326 ] {
327 let (compressed, _) = compress(data, algorithm, CompressionLevel::Default).unwrap();
328 let (decompressed, _) = decompress(&compressed, algorithm).unwrap();
329 assert_eq!(decompressed, data, "Failed for algorithm: {:?}", algorithm);
330 }
331 }
332}