base_d/features/
compression.rs

1use std::io::{Read, Write};
2
3/// Maximum size for decompressed output (100MB) to prevent decompression bombs
4const MAX_DECOMPRESS_SIZE: usize = 100 * 1024 * 1024;
5
6/// Supported compression algorithms.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum CompressionAlgorithm {
9    Gzip,
10    Zstd,
11    Brotli,
12    Lz4,
13    Snappy,
14    Lzma,
15}
16
17impl CompressionAlgorithm {
18    /// Returns all available compression algorithms.
19    pub fn all() -> Vec<CompressionAlgorithm> {
20        vec![
21            CompressionAlgorithm::Gzip,
22            CompressionAlgorithm::Zstd,
23            CompressionAlgorithm::Brotli,
24            CompressionAlgorithm::Lz4,
25            CompressionAlgorithm::Snappy,
26            CompressionAlgorithm::Lzma,
27        ]
28    }
29
30    /// Select a random compression algorithm.
31    pub fn random() -> CompressionAlgorithm {
32        use rand::prelude::IndexedRandom;
33        let all = Self::all();
34        *all.choose(&mut rand::rng()).unwrap()
35    }
36
37    /// Get default compression level for this algorithm.
38    pub fn default_level(&self) -> u32 {
39        match self {
40            CompressionAlgorithm::Gzip => 6,
41            CompressionAlgorithm::Zstd => 3,
42            CompressionAlgorithm::Brotli => 6,
43            CompressionAlgorithm::Lz4 => 0,    // LZ4 ignores level
44            CompressionAlgorithm::Snappy => 0, // Snappy ignores level
45            CompressionAlgorithm::Lzma => 6,
46        }
47    }
48
49    /// Parse compression algorithm from string.
50    #[allow(clippy::should_implement_trait)]
51    pub fn from_str(s: &str) -> Result<Self, String> {
52        match s.to_lowercase().as_str() {
53            "gzip" | "gz" => Ok(CompressionAlgorithm::Gzip),
54            "zstd" | "zst" => Ok(CompressionAlgorithm::Zstd),
55            "brotli" | "br" => Ok(CompressionAlgorithm::Brotli),
56            "lz4" => Ok(CompressionAlgorithm::Lz4),
57            "snappy" | "snap" => Ok(CompressionAlgorithm::Snappy),
58            "lzma" | "xz" => Ok(CompressionAlgorithm::Lzma),
59            _ => Err(format!("Unknown compression algorithm: {}", s)),
60        }
61    }
62
63    pub fn as_str(&self) -> &str {
64        match self {
65            CompressionAlgorithm::Gzip => "gzip",
66            CompressionAlgorithm::Zstd => "zstd",
67            CompressionAlgorithm::Brotli => "brotli",
68            CompressionAlgorithm::Lz4 => "lz4",
69            CompressionAlgorithm::Snappy => "snappy",
70            CompressionAlgorithm::Lzma => "lzma",
71        }
72    }
73}
74
75/// Compress data using the specified algorithm and level.
76pub fn compress(
77    data: &[u8],
78    algorithm: CompressionAlgorithm,
79    level: u32,
80) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
81    match algorithm {
82        CompressionAlgorithm::Gzip => compress_gzip(data, level),
83        CompressionAlgorithm::Zstd => compress_zstd(data, level),
84        CompressionAlgorithm::Brotli => compress_brotli(data, level),
85        CompressionAlgorithm::Lz4 => compress_lz4(data, level),
86        CompressionAlgorithm::Snappy => compress_snappy(data, level),
87        CompressionAlgorithm::Lzma => compress_lzma(data, level),
88    }
89}
90
91/// Decompress data using the specified algorithm.
92pub fn decompress(
93    data: &[u8],
94    algorithm: CompressionAlgorithm,
95) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
96    match algorithm {
97        CompressionAlgorithm::Gzip => decompress_gzip(data),
98        CompressionAlgorithm::Zstd => decompress_zstd(data),
99        CompressionAlgorithm::Brotli => decompress_brotli(data),
100        CompressionAlgorithm::Lz4 => decompress_lz4(data),
101        CompressionAlgorithm::Snappy => decompress_snappy(data),
102        CompressionAlgorithm::Lzma => decompress_lzma(data),
103    }
104}
105
106fn compress_gzip(data: &[u8], level: u32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
107    use flate2::Compression;
108    use flate2::write::GzEncoder;
109
110    let mut encoder = GzEncoder::new(Vec::new(), Compression::new(level));
111    encoder.write_all(data)?;
112    Ok(encoder.finish()?)
113}
114
115fn decompress_gzip(data: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
116    use flate2::read::GzDecoder;
117
118    let mut decoder = GzDecoder::new(data).take(MAX_DECOMPRESS_SIZE as u64);
119    let mut result = Vec::new();
120    let bytes_read = decoder.read_to_end(&mut result)?;
121
122    // Check if we hit the limit (possible decompression bomb)
123    if bytes_read == MAX_DECOMPRESS_SIZE {
124        return Err("Decompressed output exceeds 100MB limit (possible decompression bomb)".into());
125    }
126
127    Ok(result)
128}
129
130#[cfg(feature = "native-compression")]
131fn compress_zstd(data: &[u8], level: u32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
132    Ok(zstd::encode_all(data, level as i32)?)
133}
134
135#[cfg(feature = "native-compression")]
136fn decompress_zstd(data: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
137    use std::io::Cursor;
138
139    let mut decoder = zstd::Decoder::new(Cursor::new(data))?.take(MAX_DECOMPRESS_SIZE as u64);
140    let mut result = Vec::new();
141    let bytes_read = decoder.read_to_end(&mut result)?;
142
143    // Check if we hit the limit (possible decompression bomb)
144    if bytes_read == MAX_DECOMPRESS_SIZE {
145        return Err("Decompressed output exceeds 100MB limit (possible decompression bomb)".into());
146    }
147
148    Ok(result)
149}
150
151#[cfg(all(feature = "wasm", not(feature = "native-compression")))]
152fn compress_zstd(_data: &[u8], _level: u32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
153    // ruzstd is decode-only, no encoding support in WASM
154    Err("Zstd compression not supported in WASM (ruzstd is decode-only)".into())
155}
156
157#[cfg(all(feature = "wasm", not(feature = "native-compression")))]
158fn decompress_zstd(data: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
159    use std::io::Cursor;
160
161    let decoder = ruzstd::StreamingDecoder::new(Cursor::new(data))?;
162    let mut result = Vec::new();
163    let bytes_read = decoder
164        .take(MAX_DECOMPRESS_SIZE as u64)
165        .read_to_end(&mut result)?;
166
167    // Check if we hit the limit (possible decompression bomb)
168    if bytes_read == MAX_DECOMPRESS_SIZE {
169        return Err("Decompressed output exceeds 100MB limit (possible decompression bomb)".into());
170    }
171
172    Ok(result)
173}
174
175fn compress_brotli(data: &[u8], level: u32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
176    let mut result = Vec::new();
177    let mut reader = brotli::CompressorReader::new(data, 4096, level, 22);
178    reader.read_to_end(&mut result)?;
179    Ok(result)
180}
181
182fn decompress_brotli(data: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
183    let mut result = Vec::new();
184    let mut reader = brotli::Decompressor::new(data, 4096).take(MAX_DECOMPRESS_SIZE as u64);
185    let bytes_read = reader.read_to_end(&mut result)?;
186
187    // Check if we hit the limit (possible decompression bomb)
188    if bytes_read == MAX_DECOMPRESS_SIZE {
189        return Err("Decompressed output exceeds 100MB limit (possible decompression bomb)".into());
190    }
191
192    Ok(result)
193}
194
195#[cfg(feature = "native-compression")]
196fn compress_lz4(data: &[u8], _level: u32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
197    // LZ4 doesn't use compression levels in the same way
198    Ok(lz4::block::compress(data, None, false)?)
199}
200
201#[cfg(feature = "native-compression")]
202fn decompress_lz4(data: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
203    // We need to know the uncompressed size for LZ4, but we don't have it
204    // Use a reasonable max size (100MB)
205    Ok(lz4::block::decompress(data, Some(100 * 1024 * 1024))?)
206}
207
208#[cfg(all(feature = "wasm", not(feature = "native-compression")))]
209fn compress_lz4(data: &[u8], _level: u32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
210    // lz4_flex prepends the size automatically
211    Ok(lz4_flex::compress_prepend_size(data))
212}
213
214#[cfg(all(feature = "wasm", not(feature = "native-compression")))]
215fn decompress_lz4(data: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
216    // lz4_flex reads the prepended size automatically
217    Ok(lz4_flex::decompress_size_prepended(data)?)
218}
219
220fn compress_snappy(data: &[u8], _level: u32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
221    // Snappy doesn't support compression levels
222    let mut encoder = snap::raw::Encoder::new();
223    Ok(encoder.compress_vec(data)?)
224}
225
226fn decompress_snappy(data: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
227    let mut decoder = snap::raw::Decoder::new();
228    let result = decoder.decompress_vec(data)?;
229
230    // Check if output exceeds limit (possible decompression bomb)
231    if result.len() > MAX_DECOMPRESS_SIZE {
232        return Err("Decompressed output exceeds 100MB limit (possible decompression bomb)".into());
233    }
234
235    Ok(result)
236}
237
238#[cfg(feature = "native-compression")]
239fn compress_lzma(data: &[u8], level: u32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
240    use xz2::write::XzEncoder;
241
242    let mut encoder = XzEncoder::new(Vec::new(), level);
243    encoder.write_all(data)?;
244    Ok(encoder.finish()?)
245}
246
247#[cfg(feature = "native-compression")]
248fn decompress_lzma(data: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
249    use xz2::read::XzDecoder;
250
251    let mut decoder = XzDecoder::new(data).take(MAX_DECOMPRESS_SIZE as u64);
252    let mut result = Vec::new();
253    let bytes_read = decoder.read_to_end(&mut result)?;
254
255    // Check if we hit the limit (possible decompression bomb)
256    if bytes_read == MAX_DECOMPRESS_SIZE {
257        return Err("Decompressed output exceeds 100MB limit (possible decompression bomb)".into());
258    }
259
260    Ok(result)
261}
262
263#[cfg(all(feature = "wasm", not(feature = "native-compression")))]
264fn compress_lzma(data: &[u8], _level: u32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
265    use std::io::Cursor;
266
267    let mut output = Vec::new();
268    lzma_rs::lzma_compress(&mut Cursor::new(data), &mut output)?;
269    Ok(output)
270}
271
272#[cfg(all(feature = "wasm", not(feature = "native-compression")))]
273fn decompress_lzma(data: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
274    use std::io::Cursor;
275
276    let mut result = Vec::new();
277    lzma_rs::lzma_decompress(&mut Cursor::new(data), &mut result)?;
278
279    // Check if we hit the limit (possible decompression bomb)
280    if result.len() > MAX_DECOMPRESS_SIZE {
281        return Err("Decompressed output exceeds 100MB limit (possible decompression bomb)".into());
282    }
283
284    Ok(result)
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290
291    #[test]
292    fn test_gzip_roundtrip() {
293        let data = b"Hello, world! This is a test of gzip compression.";
294        let compressed = compress(data, CompressionAlgorithm::Gzip, 6).unwrap();
295        let decompressed = decompress(&compressed, CompressionAlgorithm::Gzip).unwrap();
296        assert_eq!(data.as_ref(), decompressed.as_slice());
297    }
298
299    #[test]
300    fn test_zstd_roundtrip() {
301        let data = b"Hello, world! This is a test of zstd compression.";
302        let compressed = compress(data, CompressionAlgorithm::Zstd, 3).unwrap();
303        let decompressed = decompress(&compressed, CompressionAlgorithm::Zstd).unwrap();
304        assert_eq!(data.as_ref(), decompressed.as_slice());
305    }
306
307    #[test]
308    fn test_brotli_roundtrip() {
309        let data = b"Hello, world! This is a test of brotli compression.";
310        let compressed = compress(data, CompressionAlgorithm::Brotli, 6).unwrap();
311        let decompressed = decompress(&compressed, CompressionAlgorithm::Brotli).unwrap();
312        assert_eq!(data.as_ref(), decompressed.as_slice());
313    }
314
315    #[test]
316    fn test_lz4_roundtrip() {
317        let data = b"Hello, world! This is a test of lz4 compression.";
318        let compressed = compress(data, CompressionAlgorithm::Lz4, 0).unwrap();
319        let decompressed = decompress(&compressed, CompressionAlgorithm::Lz4).unwrap();
320        assert_eq!(data.as_ref(), decompressed.as_slice());
321    }
322
323    #[test]
324    fn test_snappy_roundtrip() {
325        let data = b"Hello, world! This is a test of snappy compression.";
326        let compressed = compress(data, CompressionAlgorithm::Snappy, 0).unwrap();
327        let decompressed = decompress(&compressed, CompressionAlgorithm::Snappy).unwrap();
328        assert_eq!(data.as_ref(), decompressed.as_slice());
329    }
330
331    #[test]
332    fn test_lzma_roundtrip() {
333        let data = b"Hello, world! This is a test of lzma compression.";
334        let compressed = compress(data, CompressionAlgorithm::Lzma, 6).unwrap();
335        let decompressed = decompress(&compressed, CompressionAlgorithm::Lzma).unwrap();
336        assert_eq!(data.as_ref(), decompressed.as_slice());
337    }
338}