1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
use flate2::DecompressError;
use flate2::{Decompress, FlushDecompress, Status};
use thiserror::Error;

#[derive(Error, Debug)]
pub enum ZlibDecompressionError {
    #[error("An error occurred when trying to decompress the input: {0}")]
    DecompressError(#[from] DecompressError),
    #[error("Stream is not completed; Waiting for more data...")]
    NeedMoreData,
}

const ZLIB_END_BUF: [u8; 4] = [0, 0, 255, 255];
const DEFAULT_OUTPUT_BUFFER_SIZE: usize = 1024 * 128; // 128 kb

pub struct ZlibStreamDecompressor {
    inflate: Decompress,
    read_buf: Vec<u8>,
    output_buffer_factor: Option<usize>,
    output_buffer_size: Option<usize>,
}

impl ZlibStreamDecompressor {
    /// Creates a new ZlibStreamDecompressor with the default configuration
    ///
    /// -> Uses a default output buffer of 128 kb
    pub fn new() -> Self {
        return ZlibStreamDecompressor::with_buffer_size(DEFAULT_OUTPUT_BUFFER_SIZE);
    }

    /// Creates a new ZlibStreamDecompressor with the given output buffer factor
    ///
    /// The factor means that the output buffers size will be dependent on the
    /// read buffers size.
    ///
    /// This is a possible attack vector if your input is not verified, as it can easily
    /// consume a lot of memory if there's no ZLIB_END signature for a long time
    pub fn with_buffer_factor(output_buffer_factor: usize) -> Self {
        Self {
            inflate: Decompress::new(true),
            read_buf: Vec::new(),
            output_buffer_factor: Some(output_buffer_factor),
            output_buffer_size: None,
        }
    }

    /// Creates a new ZlibStreamDecompressor with the given output buffer size
    ///
    /// The buffer size will be fixed at the given value
    pub fn with_buffer_size(output_buffer_size: usize) -> Self {
        Self {
            inflate: Decompress::new(true),
            read_buf: Vec::new(),
            output_buffer_factor: None,
            output_buffer_size: Some(output_buffer_size),
        }
    }

    /// Append the current frame to the read buffer and decompress it if the buffer
    /// ends with a ZLIB_END signature
    ///
    /// This method returns a ZlibDecompressionError::NeedMoreData if the frame does
    /// not end with a ZLIB_END signature
    ///
    /// If the given frame is invalid this method returns a ZlibDecompressionError::DecompressError
    /// this most likely means the state of the entire compressor went out of sync and it should
    /// be recreated.
    ///
    /// In case everything went `Ok`, it will return a Vec<u8> representing the
    /// decompressed data
    pub fn decompress(
        &mut self,
        mut frame: Vec<u8>,
    ) -> Result<Vec<u8>, ZlibDecompressionError> {
        if self.read_buf.is_empty() {
            self.read_buf = frame;
        } else {
            self.read_buf.append(&mut frame);
        }
        if !self.read_buf.ends_with(&ZLIB_END_BUF) {
            return Err(ZlibDecompressionError::NeedMoreData);
        }
        let size_in = self.read_buf.len();
        let mut read_offset = 0usize;
        let mut out = self.generate_output_buffer(self.read_buf.len());
        let mut output_buf = vec![];
        loop {
            let bytes_before = self.inflate.total_in();
            let status = self.inflate.decompress_vec(
                &self.read_buf[read_offset..],
                &mut out,
                FlushDecompress::Sync,
            )?;
            match status {
                Status::Ok => {
                    output_buf.append(&mut out);
                    out = self.generate_output_buffer(self.read_buf.len());
                    let bytes_after = self.inflate.total_in();
                    read_offset = read_offset + (bytes_after - bytes_before) as usize;
                    if read_offset >= self.read_buf.len() {
                        self.read_buf.clear();
                        log::trace!(
                            "Decompression bytes - Input {}b -> Output {}b | Factor: x{:.2}",
                            size_in,
                            output_buf.len(),
                            output_buf.len() as f64 / size_in as f64
                        );
                        return Ok(output_buf);
                    }
                }
                Status::BufError | Status::StreamEnd => {
                    self.read_buf.clear();
                    output_buf.append(&mut out);
                    output_buf.shrink_to_fit();

                    log::trace!(
                        "Decompression bytes - Input {}b -> Output {}b | Factor: x{:.2}",
                        size_in,
                        output_buf.len(),
                        output_buf.len() as f64 / size_in as f64
                    );

                    return Ok(output_buf);
                }
            }
        }
    }

    /// Generates a new output buffer based of the given configuration
    fn generate_output_buffer(&self, frame_size: usize) -> Vec<u8> {
        return if let Some(buffer_factor) = self.output_buffer_factor {
            Vec::with_capacity(frame_size * buffer_factor)
        } else {
            Vec::with_capacity(self.output_buffer_size.unwrap_or(DEFAULT_OUTPUT_BUFFER_SIZE))
        };
    }
}