Skip to main content

pulith_fetch/codec/
decompress.rs

1//! Stream decompression functionality.
2//!
3//! This module provides stream transformation for decompressing
4//! downloaded content on the fly.
5
6use crate::error::{Error, Result};
7use std::io::Read;
8
9/// Compression types supported by the fetcher.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum CompressionType {
12    /// No compression
13    None,
14    /// Gzip compression
15    Gzip,
16    /// Deflate compression
17    Deflate,
18    /// Brotli compression (feature-gated)
19    #[cfg(feature = "brotli")]
20    Brotli,
21}
22
23impl CompressionType {
24    /// Detect compression type from Content-Encoding header value.
25    pub fn from_encoding(encoding: &str) -> Self {
26        match encoding.to_lowercase().as_str() {
27            "gzip" | "x-gzip" => CompressionType::Gzip,
28            "deflate" => CompressionType::Deflate,
29            #[cfg(feature = "brotli")]
30            "br" => CompressionType::Brotli,
31            _ => CompressionType::None,
32        }
33    }
34
35    /// Get the Content-Encoding header value for this compression type.
36    pub fn as_encoding(self) -> &'static str {
37        match self {
38            CompressionType::None => "identity",
39            CompressionType::Gzip => "gzip",
40            CompressionType::Deflate => "deflate",
41            #[cfg(feature = "brotli")]
42            CompressionType::Brotli => "br",
43        }
44    }
45}
46
47/// Error type for stream transformations.
48#[derive(Debug, thiserror::Error)]
49pub enum TransformError {
50    #[error("Transformation error: {0}")]
51    Transform(String),
52    #[error("Invalid compressed data: {0}")]
53    InvalidData(String),
54    #[error("Unsupported compression type: {0:?}")]
55    UnsupportedType(CompressionType),
56}
57
58/// Stream transform trait for decompression.
59pub trait StreamTransform {
60    /// Transform the input bytes.
61    fn transform(&mut self, input: &[u8]) -> Result<Vec<u8>>;
62
63    /// Finalize the transformation (for stream-based decoders).
64    fn finalize(&mut self) -> Result<Vec<u8>> {
65        Ok(Vec::new())
66    }
67
68    /// Reset the transformer state.
69    fn reset(&mut self) -> Result<()>;
70}
71
72/// Gzip decompressor implementation.
73pub struct GzipDecoder {
74    decoder: Option<flate2::read::GzDecoder<std::io::Cursor<Vec<u8>>>>,
75    buffer: Vec<u8>,
76}
77
78impl Default for GzipDecoder {
79    fn default() -> Self {
80        Self::new()
81    }
82}
83
84impl GzipDecoder {
85    /// Create a new Gzip decoder.
86    pub fn new() -> Self {
87        Self {
88            decoder: None,
89            buffer: Vec::new(),
90        }
91    }
92}
93
94impl StreamTransform for GzipDecoder {
95    fn transform(&mut self, input: &[u8]) -> Result<Vec<u8>> {
96        self.buffer.extend_from_slice(input);
97
98        if self.decoder.is_none() {
99            self.decoder = Some(flate2::read::GzDecoder::new(std::io::Cursor::new(
100                self.buffer.clone(),
101            )));
102        }
103
104        let mut output = Vec::new();
105        if let Some(ref mut decoder) = self.decoder {
106            decoder
107                .read_to_end(&mut output)
108                .map_err(|e| Error::Transform(TransformError::InvalidData(e.to_string())))?;
109        }
110
111        Ok(output)
112    }
113
114    fn finalize(&mut self) -> Result<Vec<u8>> {
115        if self.decoder.is_none() && !self.buffer.is_empty() {
116            // Try to decode remaining data
117            self.transform(&[])
118        } else {
119            Ok(Vec::new())
120        }
121    }
122
123    fn reset(&mut self) -> Result<()> {
124        self.decoder = None;
125        self.buffer.clear();
126        Ok(())
127    }
128}
129
130/// Deflate decompressor implementation.
131pub struct DeflateDecoder {
132    decoder: Option<flate2::read::DeflateDecoder<std::io::Cursor<Vec<u8>>>>,
133    buffer: Vec<u8>,
134}
135
136impl Default for DeflateDecoder {
137    fn default() -> Self {
138        Self::new()
139    }
140}
141
142impl DeflateDecoder {
143    /// Create a new Deflate decoder.
144    pub fn new() -> Self {
145        Self {
146            decoder: None,
147            buffer: Vec::new(),
148        }
149    }
150}
151
152impl StreamTransform for DeflateDecoder {
153    fn transform(&mut self, input: &[u8]) -> Result<Vec<u8>> {
154        self.buffer.extend_from_slice(input);
155
156        if self.decoder.is_none() {
157            self.decoder = Some(flate2::read::DeflateDecoder::new(std::io::Cursor::new(
158                self.buffer.clone(),
159            )));
160        }
161
162        let mut output = Vec::new();
163        if let Some(ref mut decoder) = self.decoder {
164            decoder
165                .read_to_end(&mut output)
166                .map_err(|e| Error::Transform(TransformError::InvalidData(e.to_string())))?;
167        }
168
169        Ok(output)
170    }
171
172    fn finalize(&mut self) -> Result<Vec<u8>> {
173        if self.decoder.is_none() && !self.buffer.is_empty() {
174            // Try to decode remaining data
175            self.transform(&[])
176        } else {
177            Ok(Vec::new())
178        }
179    }
180
181    fn reset(&mut self) -> Result<()> {
182        self.decoder = None;
183        self.buffer.clear();
184        Ok(())
185    }
186}
187
188/// Brotli decompressor implementation (feature-gated).
189#[cfg(feature = "brotli")]
190pub struct BrotliDecoder {
191    buffer: Vec<u8>,
192}
193
194#[cfg(feature = "brotli")]
195impl Default for BrotliDecoder {
196    fn default() -> Self {
197        Self::new()
198    }
199}
200
201#[cfg(feature = "brotli")]
202impl BrotliDecoder {
203    /// Create a new Brotli decoder.
204    pub fn new() -> Self {
205        Self { buffer: Vec::new() }
206    }
207}
208
209#[cfg(feature = "brotli")]
210impl StreamTransform for BrotliDecoder {
211    fn transform(&mut self, input: &[u8]) -> Result<Vec<u8>> {
212        self.buffer.extend_from_slice(input);
213
214        let mut decoder = brotli::Decompressor::new(std::io::Cursor::new(&self.buffer), 4096);
215        let mut output = Vec::new();
216        decoder
217            .read_to_end(&mut output)
218            .map_err(|e| Error::Transform(TransformError::InvalidData(e.to_string())))?;
219
220        Ok(output)
221    }
222
223    fn finalize(&mut self) -> Result<Vec<u8>> {
224        if self.buffer.is_empty() {
225            Ok(Vec::new())
226        } else {
227            self.transform(&[])
228        }
229    }
230
231    fn reset(&mut self) -> Result<()> {
232        self.buffer.clear();
233        Ok(())
234    }
235}
236
237/// Factory function to create appropriate decoder for compression type.
238pub fn create_decoder(compression_type: CompressionType) -> Result<Box<dyn StreamTransform>> {
239    match compression_type {
240        CompressionType::None => Err(Error::Transform(TransformError::UnsupportedType(
241            CompressionType::None,
242        ))),
243        CompressionType::Gzip => Ok(Box::new(GzipDecoder::new())),
244        CompressionType::Deflate => Ok(Box::new(DeflateDecoder::new())),
245        #[cfg(feature = "brotli")]
246        CompressionType::Brotli => Ok(Box::new(BrotliDecoder::new())),
247    }
248}
249
250/// Convenience function to decompress data in one go.
251pub fn decompress(data: &[u8], compression_type: CompressionType) -> Result<Vec<u8>> {
252    let mut decoder = create_decoder(compression_type)?;
253    let result = decoder.transform(data)?;
254    let final_data = decoder.finalize()?;
255
256    // Combine result and final data
257    let mut output = result;
258    output.extend_from_slice(&final_data);
259
260    Ok(output)
261}
262
263#[cfg(test)]
264mod tests {
265    use super::*;
266    use flate2::write::DeflateEncoder;
267    use flate2::{Compression as FlateCompression, write::GzEncoder};
268    use std::io::Write;
269
270    fn create_gzip_data(data: &[u8]) -> Vec<u8> {
271        let mut encoder = GzEncoder::new(Vec::new(), FlateCompression::default());
272        encoder.write_all(data).unwrap();
273        encoder.finish().unwrap()
274    }
275
276    fn create_deflate_data(data: &[u8]) -> Vec<u8> {
277        let mut encoder = DeflateEncoder::new(Vec::new(), FlateCompression::default());
278        encoder.write_all(data).unwrap();
279        encoder.finish().unwrap()
280    }
281
282    #[test]
283    fn test_compression_type_detection() {
284        assert_eq!(
285            CompressionType::from_encoding("gzip"),
286            CompressionType::Gzip
287        );
288        assert_eq!(
289            CompressionType::from_encoding("GZIP"),
290            CompressionType::Gzip
291        );
292        assert_eq!(
293            CompressionType::from_encoding("x-gzip"),
294            CompressionType::Gzip
295        );
296        assert_eq!(
297            CompressionType::from_encoding("deflate"),
298            CompressionType::Deflate
299        );
300        assert_eq!(
301            CompressionType::from_encoding("unknown"),
302            CompressionType::None
303        );
304        assert_eq!(CompressionType::from_encoding(""), CompressionType::None);
305    }
306
307    #[test]
308    fn test_gzip_decompression() {
309        let original = b"Hello, World! This is a test string for gzip compression.";
310        let compressed = create_gzip_data(original);
311
312        let mut decoder = GzipDecoder::new();
313        let decompressed = decoder.transform(&compressed).unwrap();
314
315        assert_eq!(decompressed, original);
316    }
317
318    #[test]
319    fn test_deflate_decompression() {
320        let original = b"Hello, World! This is a test string for deflate compression.";
321        let compressed = create_deflate_data(original);
322
323        let mut decoder = DeflateDecoder::new();
324        let decompressed = decoder.transform(&compressed).unwrap();
325
326        assert_eq!(decompressed, original);
327    }
328
329    #[test]
330    fn test_convenience_function() {
331        let original = b"Test data for convenience function.";
332        let gzip_data = create_gzip_data(original);
333        let deflate_data = create_deflate_data(original);
334
335        // Test gzip
336        let decompressed = decompress(&gzip_data, CompressionType::Gzip).unwrap();
337        assert_eq!(decompressed, original);
338
339        // Test deflate
340        let decompressed = decompress(&deflate_data, CompressionType::Deflate).unwrap();
341        assert_eq!(decompressed, original);
342    }
343
344    #[test]
345    fn test_decoder_factory() {
346        let gzip_decoder = create_decoder(CompressionType::Gzip);
347        assert!(gzip_decoder.is_ok());
348
349        let deflate_decoder = create_decoder(CompressionType::Deflate);
350        assert!(deflate_decoder.is_ok());
351
352        let none_decoder = create_decoder(CompressionType::None);
353        assert!(none_decoder.is_err());
354    }
355
356    #[test]
357    fn test_invalid_gzip_data() {
358        let invalid_data = b"This is not valid gzip data";
359        let mut decoder = GzipDecoder::new();
360
361        let result = decoder.transform(invalid_data);
362        assert!(result.is_err());
363    }
364
365    #[test]
366    fn test_invalid_deflate_data() {
367        let invalid_data = b"This is not valid deflate data";
368        let mut decoder = DeflateDecoder::new();
369
370        let result = decoder.transform(invalid_data);
371        assert!(result.is_err());
372    }
373
374    #[test]
375    fn test_decoder_reset() {
376        let original = b"Test data for reset.";
377        let compressed = create_gzip_data(original);
378
379        let mut decoder = GzipDecoder::new();
380
381        // First decode
382        let result1 = decoder.transform(&compressed).unwrap();
383        assert_eq!(result1, original);
384
385        // Reset and decode again
386        decoder.reset().unwrap();
387        let result2 = decoder.transform(&compressed).unwrap();
388        assert_eq!(result2, original);
389    }
390
391    #[test]
392    fn test_empty_data() {
393        let mut decoder = GzipDecoder::new();
394        // Empty data should not error, but may return empty result
395        let result = decoder.transform(&[]);
396        // Empty input without any gzip header will error, which is expected
397        assert!(result.is_err() || result.unwrap().is_empty());
398    }
399
400    #[test]
401    fn test_large_data() {
402        let original: Vec<u8> = (0..10000).map(|i| (i % 256) as u8).collect();
403        let compressed = create_gzip_data(&original);
404
405        let mut decoder = GzipDecoder::new();
406        let decompressed = decoder.transform(&compressed).unwrap();
407
408        assert_eq!(decompressed, original);
409    }
410}