lzma_rust2/
lzma2_reader.rs

1use super::{
2    decoder::LzmaDecoder,
3    error_invalid_input,
4    lz::LzDecoder,
5    range_dec::{RangeDecoder, RangeDecoderBuffer},
6    Read,
7};
8use crate::ByteReader;
9
10pub const COMPRESSED_SIZE_MAX: u32 = 1 << 16;
11
12/// A single-threaded LZMA2 decompressor.
13///
14/// # Examples
15/// ```
16/// use std::io::Read;
17///
18/// use lzma_rust2::{Lzma2Reader, LzmaOptions};
19///
20/// let compressed: Vec<u8> = vec![
21///     1, 0, 12, 72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33, 0,
22/// ];
23/// let mut reader = Lzma2Reader::new(compressed.as_slice(), LzmaOptions::DICT_SIZE_DEFAULT, None);
24/// let mut decompressed = Vec::new();
25/// reader.read_to_end(&mut decompressed).unwrap();
26/// assert_eq!(&decompressed[..], b"Hello, world!");
27/// ```
28pub struct Lzma2Reader<R> {
29    inner: R,
30    lz: LzDecoder,
31    rc: RangeDecoder<RangeDecoderBuffer>,
32    lzma: Option<LzmaDecoder>,
33    uncompressed_size: usize,
34    is_lzma_chunk: bool,
35    need_dict_reset: bool,
36    need_props: bool,
37    end_reached: bool,
38}
39
40/// Calculates the memory usage in KiB required for LZMA2 decompression.
41#[inline]
42pub fn get_memory_usage(dict_size: u32) -> u32 {
43    40 + COMPRESSED_SIZE_MAX / 1024 + get_dict_size(dict_size) / 1024
44}
45
46#[inline]
47fn get_dict_size(dict_size: u32) -> u32 {
48    if dict_size >= (u32::MAX - 15) {
49        return u32::MAX;
50    }
51
52    (dict_size + 15) & !15
53}
54
55impl<R> Lzma2Reader<R> {
56    /// Unwraps the reader, returning the underlying reader.
57    pub fn into_inner(self) -> R {
58        self.inner
59    }
60
61    /// Returns a reference to the inner reader.
62    pub fn inner(&self) -> &R {
63        &self.inner
64    }
65
66    /// Returns a mutable reference to the inner reader.
67    pub fn inner_mut(&mut self) -> &mut R {
68        &mut self.inner
69    }
70}
71
72impl<R: Read> Lzma2Reader<R> {
73    /// Create a new LZMA2 reader.
74    /// `inner` is the reader to read compressed data from.
75    /// `dict_size` is the dictionary size in bytes.
76    pub fn new(inner: R, dict_size: u32, preset_dict: Option<&[u8]>) -> Self {
77        let has_preset = preset_dict.as_ref().map(|a| !a.is_empty()).unwrap_or(false);
78        let lz = LzDecoder::new(get_dict_size(dict_size) as _, preset_dict);
79        let rc = RangeDecoder::new_buffer(COMPRESSED_SIZE_MAX as _);
80        Self {
81            inner,
82            lz,
83            rc,
84            lzma: None,
85            uncompressed_size: 0,
86            is_lzma_chunk: false,
87            need_dict_reset: !has_preset,
88            need_props: true,
89            end_reached: false,
90        }
91    }
92
93    // ### LZMA2 Control Byte Meaning
94    //
95    //  Control Byte    | Chunk Type      | Formal Action
96    //  --------------- | --------------- | ----------------------------
97    //  0x00            | End of Stream   | Terminates the LZMA2 stream.
98    //  0x01            | Uncompressed    | Resets Dictionary.
99    //  0x02            | Uncompressed    | Preserves Dictionary.
100    //  0x03 – 0x7F     | Reserved        | Invalid stream.
101    //  0x80 – 0xFF     | LZMA Compressed | Varies based on bits 6 and 5
102    //
103    // ### Detailed Breakdown of LZMA Compressed Chunks (0x80 - 0xFF)
104    //
105    //  Bits | Control Byte | Reset Action            | Suitable for Parallel Start? |
106    //  ---- | ------------ | ----------------------- | ---------------------------- |
107    //  00   | 0x80 – 0x9F  | None                    | No
108    //  01   | 0xA0 – 0xBF  | Reset State             | No
109    //  10   | 0xC0 – 0xDF  | Reset State & Props     | No
110    //  11   | 0xE0 – 0xFF  | Reset Everything        | Yes
111    fn decode_chunk_header(&mut self) -> crate::Result<()> {
112        let control = self.inner.read_u8()?;
113
114        if control == 0x00 {
115            self.end_reached = true;
116            return Ok(());
117        }
118
119        if control >= 0xE0 || control == 0x01 {
120            self.need_props = true;
121            self.need_dict_reset = false;
122            // Reset dictionary
123            self.lz.reset();
124        } else if self.need_dict_reset {
125            return Err(error_invalid_input("corrupted input data (LZMA2:0)"));
126        }
127        if control >= 0x80 {
128            self.is_lzma_chunk = true;
129            self.uncompressed_size = ((control & 0x1F) as usize) << 16;
130            self.uncompressed_size += self.inner.read_u16_be()? as usize + 1;
131            let compressed_size = self.inner.read_u16_be()? as usize + 1;
132
133            if control >= 0xC0 {
134                // Reset props and state (by re-creating it)
135                self.need_props = false;
136                self.decode_props()?;
137            } else if self.need_props {
138                return Err(error_invalid_input("corrupted input data (LZMA2:1)"));
139            } else if control >= 0xA0 {
140                // Reset state
141                if let Some(l) = self.lzma.as_mut() {
142                    l.reset()
143                }
144            }
145
146            self.rc.prepare(&mut self.inner, compressed_size)?;
147        } else if control > 0x02 {
148            return Err(error_invalid_input("corrupted input data (LZMA2:2)"));
149        } else {
150            self.is_lzma_chunk = false;
151            self.uncompressed_size = (self.inner.read_u16_be()? as usize) + 1;
152        }
153        Ok(())
154    }
155
156    /// Reads the next props and re-creates the state by creating a new decoder.
157    fn decode_props(&mut self) -> crate::Result<()> {
158        let props = self.inner.read_u8()?;
159        if props > (4 * 5 + 4) * 9 + 8 {
160            return Err(error_invalid_input("corrupted input data (LZMA2:3)"));
161        }
162        let pb = props / (9 * 5);
163        let props = props - pb * 9 * 5;
164        let lp = props / 9;
165        let lc = props - lp * 9;
166        if lc + lp > 4 {
167            return Err(error_invalid_input("corrupted input data (LZMA2:4)"));
168        }
169        self.lzma = Some(LzmaDecoder::new(lc as _, lp as _, pb as _));
170
171        Ok(())
172    }
173}
174
175impl<R: Read> Read for Lzma2Reader<R> {
176    fn read(&mut self, buf: &mut [u8]) -> crate::Result<usize> {
177        if buf.is_empty() {
178            return Ok(0);
179        }
180
181        if self.end_reached {
182            return Ok(0);
183        }
184        let mut size = 0;
185        let mut len = buf.len();
186        let mut off = 0;
187        while len > 0 {
188            if self.uncompressed_size == 0 {
189                self.decode_chunk_header()?;
190                if self.end_reached {
191                    return Ok(size);
192                }
193            }
194
195            let copy_size_max = self.uncompressed_size.min(len);
196            if !self.is_lzma_chunk {
197                self.lz.copy_uncompressed(&mut self.inner, copy_size_max)?;
198            } else {
199                self.lz.set_limit(copy_size_max);
200                if let Some(lzma) = self.lzma.as_mut() {
201                    lzma.decode(&mut self.lz, &mut self.rc)?;
202                }
203            }
204
205            {
206                let copied_size = self.lz.flush(buf, off)?;
207                off = off.saturating_add(copied_size);
208                len = len.saturating_sub(copied_size);
209                size = size.saturating_add(copied_size);
210                self.uncompressed_size = self.uncompressed_size.saturating_sub(copied_size);
211                if self.uncompressed_size == 0 && (!self.rc.is_finished() || self.lz.has_pending())
212                {
213                    return Err(error_invalid_input("rc not finished or lz has pending"));
214                }
215            }
216        }
217
218        Ok(size)
219    }
220}