lzma_rust2/
lzma_reader.rs

1use super::{
2    decoder::LzmaDecoder, error_invalid_data, error_invalid_input, error_out_of_memory,
3    lz::LzDecoder, range_dec::RangeDecoder, ByteReader, Read, DICT_SIZE_MAX,
4};
5
6/// Calculates the memory usage in KiB required for LZMA decompression from properties byte.
7pub fn get_memory_usage_by_props(dict_size: u32, props_byte: u8) -> crate::Result<u32> {
8    if dict_size > DICT_SIZE_MAX {
9        return Err(error_invalid_input("dict size too large"));
10    }
11    if props_byte > (4 * 5 + 4) * 9 + 8 {
12        return Err(error_invalid_input("invalid props byte"));
13    }
14    let props = props_byte % (9 * 5);
15    let lp = props / 9;
16    let lc = props - lp * 9;
17    get_memory_usage(dict_size, lc as u32, lp as u32)
18}
19
20/// Calculates the memory usage in KiB required for LZMA decompression.
21pub fn get_memory_usage(dict_size: u32, lc: u32, lp: u32) -> crate::Result<u32> {
22    if lc > 8 || lp > 4 {
23        return Err(error_invalid_input("invalid lc or lp"));
24    }
25    Ok(10 + get_dict_size(dict_size)? / 1024 + ((2 * 0x300) << (lc + lp)) / 1024)
26}
27
28fn get_dict_size(dict_size: u32) -> crate::Result<u32> {
29    if dict_size > DICT_SIZE_MAX {
30        return Err(error_invalid_input("dict size too large"));
31    }
32    let dict_size = dict_size.max(4096);
33    Ok((dict_size + 15) & !15)
34}
35
36/// A single-threaded LZMA decompressor.
37///
38/// # Examples
39/// ```
40/// use std::io::Read;
41///
42/// use lzma_rust2::LzmaReader;
43///
44/// let compressed: Vec<u8> = vec![
45///     93, 0, 0, 128, 0, 255, 255, 255, 255, 255, 255, 255, 255, 0, 36, 25, 73, 152, 111, 22, 2,
46///     140, 232, 230, 91, 177, 71, 198, 206, 183, 99, 255, 255, 60, 172, 0, 0,
47/// ];
48/// let mut reader = LzmaReader::new_mem_limit(compressed.as_slice(), u32::MAX, None).unwrap();
49/// let mut buf = [0; 1024];
50/// let mut out = Vec::new();
51/// loop {
52///     let n = reader.read(&mut buf).unwrap();
53///     if n == 0 {
54///         break;
55///     }
56///     out.extend_from_slice(&buf[..n]);
57/// }
58/// assert_eq!(out, b"Hello, world!");
59/// ```
60pub struct LzmaReader<R> {
61    lz: LzDecoder,
62    rc: RangeDecoder<R>,
63    lzma: LzmaDecoder,
64    end_reached: bool,
65    relaxed_end_cond: bool,
66    remaining_size: u64,
67}
68
69impl<R> LzmaReader<R> {
70    /// Unwraps the reader, returning the underlying reader.
71    pub fn into_inner(self) -> R {
72        self.rc.into_inner()
73    }
74
75    /// Returns a reference to the inner reader.
76    pub fn inner(&self) -> &R {
77        self.rc.inner()
78    }
79
80    /// Returns a mutable reference to the inner reader.
81    pub fn inner_mut(&mut self) -> &mut R {
82        self.rc.inner_mut()
83    }
84}
85
86impl<R: Read> LzmaReader<R> {
87    fn construct1(
88        reader: R,
89        uncomp_size: u64,
90        mut props: u8,
91        dict_size: u32,
92        preset_dict: Option<&[u8]>,
93    ) -> crate::Result<Self> {
94        if props > (4 * 5 + 4) * 9 + 8 {
95            return Err(error_invalid_input("invalid props byte"));
96        }
97        let pb = props / (9 * 5);
98        props -= pb * 9 * 5;
99        let lp = props / 9;
100        let lc = props - lp * 9;
101        if dict_size > DICT_SIZE_MAX {
102            return Err(error_invalid_input("dict size too large"));
103        }
104        Self::construct2(
105            reader,
106            uncomp_size,
107            lc as _,
108            lp as _,
109            pb as _,
110            dict_size,
111            preset_dict,
112        )
113    }
114
115    fn construct2(
116        reader: R,
117        uncomp_size: u64,
118        lc: u32,
119        lp: u32,
120        pb: u32,
121        dict_size: u32,
122        preset_dict: Option<&[u8]>,
123    ) -> crate::Result<Self> {
124        if lc > 8 || lp > 4 || pb > 4 {
125            return Err(error_invalid_input("invalid lc or lp or pb"));
126        }
127        let mut dict_size = get_dict_size(dict_size)?;
128        if uncomp_size <= u64::MAX / 2 && dict_size as u64 > uncomp_size {
129            dict_size = get_dict_size(uncomp_size as u32)?;
130        }
131        let rc = RangeDecoder::new_stream(reader);
132        let rc = match rc {
133            Ok(r) => r,
134            Err(e) => {
135                return Err(e);
136            }
137        };
138        let lz = LzDecoder::new(get_dict_size(dict_size)? as _, preset_dict);
139        let lzma = LzmaDecoder::new(lc, lp, pb);
140        Ok(Self {
141            // reader,
142            lz,
143            rc,
144            lzma,
145            end_reached: false,
146            relaxed_end_cond: true,
147            remaining_size: uncomp_size,
148        })
149    }
150
151    /// Creates a new .lzma file format decompressor with an optional memory usage limit.
152    /// - `mem_limit_kb` - memory usage limit in kibibytes (KiB). `u32::MAX` means no limit.
153    /// - `preset_dict` - preset dictionary or None to use no preset dictionary.
154    pub fn new_mem_limit(
155        mut reader: R,
156        mem_limit_kb: u32,
157        preset_dict: Option<&[u8]>,
158    ) -> crate::Result<Self> {
159        let props = reader.read_u8()?;
160        let dict_size = reader.read_u32()?;
161
162        let uncomp_size = reader.read_u64()?;
163        let need_mem = get_memory_usage_by_props(dict_size, props)?;
164        if mem_limit_kb < need_mem {
165            return Err(error_out_of_memory(
166                "needed memory too big for mem_limit_kb",
167            ));
168        }
169        Self::construct1(reader, uncomp_size, props, dict_size, preset_dict)
170    }
171
172    /// Creates a new input stream that decompresses raw LZMA data (no .lzma header) from `reader` optionally with a preset dictionary.
173    /// - `reader` - the reader to read compressed data from.
174    /// - `uncomp_size` - the uncompressed size of the data to be decompressed.
175    /// - `props` - the LZMA properties byte.
176    /// - `dict_size` - the LZMA dictionary size.
177    /// - `preset_dict` - preset dictionary or None to use no preset dictionary.
178    pub fn new_with_props(
179        reader: R,
180        uncomp_size: u64,
181        props: u8,
182        dict_size: u32,
183        preset_dict: Option<&[u8]>,
184    ) -> crate::Result<Self> {
185        Self::construct1(reader, uncomp_size, props, dict_size, preset_dict)
186    }
187
188    /// Creates a new input stream that decompresses raw LZMA data (no .lzma header) from `reader` optionally with a preset dictionary.
189    /// - `reader` - the input stream to read compressed data from.
190    /// - `uncomp_size` - the uncompressed size of the data to be decompressed.
191    /// - `lc` - the number of literal context bits.
192    /// - `lp` - the number of literal position bits.
193    /// - `pb` - the number of position bits.
194    /// - `dict_size` - the LZMA dictionary size.
195    /// - `preset_dict` - preset dictionary or None to use no preset dictionary.
196    pub fn new(
197        reader: R,
198        uncomp_size: u64,
199        lc: u32,
200        lp: u32,
201        pb: u32,
202        dict_size: u32,
203        preset_dict: Option<&[u8]>,
204    ) -> crate::Result<Self> {
205        Self::construct2(reader, uncomp_size, lc, lp, pb, dict_size, preset_dict)
206    }
207
208    fn read_decode(&mut self, buf: &mut [u8]) -> crate::Result<usize> {
209        if buf.is_empty() {
210            return Ok(0);
211        }
212        if self.end_reached {
213            return Ok(0);
214        }
215        let mut size: u64 = 0;
216        let mut len = buf.len() as u64;
217        let mut off: u64 = 0;
218        while len > 0 {
219            let mut copy_size_max = len;
220            if self.remaining_size <= u64::MAX / 2 && self.remaining_size < len {
221                copy_size_max = self.remaining_size;
222            }
223            self.lz.set_limit(copy_size_max as usize);
224
225            match self.lzma.decode(&mut self.lz, &mut self.rc) {
226                Ok(_) => {}
227                Err(error) => {
228                    if self.remaining_size != u64::MAX || !self.lzma.end_marker_detected() {
229                        return Err(error);
230                    }
231                    self.end_reached = true;
232                    self.rc.normalize();
233                }
234            }
235
236            let copied_size = self.lz.flush(buf, off as _)? as u64;
237            off = off.saturating_add(copied_size);
238            len = len.saturating_sub(copied_size);
239            size = size.saturating_add(copied_size);
240            if self.remaining_size <= u64::MAX / 2 {
241                self.remaining_size = self.remaining_size.saturating_sub(copied_size);
242                if self.remaining_size == 0 {
243                    self.end_reached = true;
244                }
245            }
246
247            if self.end_reached {
248                if self.lz.has_pending()
249                    || (!self.relaxed_end_cond && !self.rc.is_stream_finished())
250                {
251                    return Err(error_invalid_data("end reached but not decoder finished"));
252                }
253                return Ok(size as _);
254            }
255        }
256        Ok(size as _)
257    }
258}
259
260impl<R: Read> Read for LzmaReader<R> {
261    fn read(&mut self, buf: &mut [u8]) -> crate::Result<usize> {
262        self.read_decode(buf)
263    }
264}