lzma_rust2/
lzma_reader.rs

1use std::io::{Error, ErrorKind, Read};
2
3use byteorder::{LittleEndian, ReadBytesExt};
4
5use super::decoder::LZMADecoder;
6use super::lz::LZDecoder;
7use super::range_dec::RangeDecoder;
8use super::*;
9
10pub fn get_memory_usage_by_props(dict_size: u32, props_byte: u8) -> std::io::Result<u32> {
11    if dict_size > DICT_SIZE_MAX {
12        return Err(Error::new(ErrorKind::InvalidInput, "dict size too large"));
13    }
14    if props_byte > (4 * 5 + 4) * 9 + 8 {
15        return Err(Error::new(ErrorKind::InvalidInput, "Invalid props byte"));
16    }
17    let props = props_byte % (9 * 5);
18    let lp = props / 9;
19    let lc = props - lp * 9;
20    get_memory_usage(dict_size, lc as u32, lp as u32)
21}
22
23pub fn get_memory_usage(dict_size: u32, lc: u32, lp: u32) -> std::io::Result<u32> {
24    if lc > 8 || lp > 4 {
25        return Err(Error::new(ErrorKind::InvalidInput, "Invalid lc or lp"));
26    }
27    Ok(10 + get_dict_size(dict_size)? / 1024 + ((2 * 0x300) << (lc + lp)) / 1024)
28}
29
30fn get_dict_size(dict_size: u32) -> std::io::Result<u32> {
31    if dict_size > DICT_SIZE_MAX {
32        return Err(Error::new(ErrorKind::InvalidInput, "dict size too large"));
33    }
34    let dict_size = dict_size.max(4096);
35    Ok((dict_size + 15) & !15)
36}
37
38/// # Examples
39/// ```
40/// use std::io::Read;
41/// use lzma_rust2::LZMAReader;
42///
43/// let compressed: Vec<u8> = vec![93, 0, 0, 128, 0, 255, 255, 255, 255, 255, 255, 255, 255, 0, 36, 25, 73, 152, 111, 22, 2, 140, 232, 230, 91, 177, 71, 198, 206, 183, 99, 255, 255, 60, 172, 0, 0];
44/// let mut reader = LZMAReader::new_mem_limit(compressed.as_slice(), u32::MAX, None).unwrap();
45/// let mut buf = [0; 1024];
46/// let mut out = Vec::new();
47/// loop {
48///    let n = reader.read(&mut buf).unwrap();
49///   if n == 0 {
50///      break;
51///   }
52///   out.extend_from_slice(&buf[..n]);
53/// }
54/// assert_eq!(out, b"Hello, world!");
55/// ```
56pub struct LZMAReader<R> {
57    lz: LZDecoder,
58    rc: RangeDecoder<R>,
59    lzma: LZMADecoder,
60    end_reached: bool,
61    relaxed_end_cond: bool,
62    remaining_size: u64,
63}
64
65impl<R: Read> LZMAReader<R> {
66    fn construct1(
67        reader: R,
68        uncomp_size: u64,
69        mut props: u8,
70        dict_size: u32,
71        preset_dict: Option<&[u8]>,
72    ) -> std::io::Result<Self> {
73        if props > (4 * 5 + 4) * 9 + 8 {
74            return Err(Error::new(ErrorKind::InvalidInput, "Invalid props byte"));
75        }
76        let pb = props / (9 * 5);
77        props -= pb * 9 * 5;
78        let lp = props / 9;
79        let lc = props - lp * 9;
80        if dict_size > DICT_SIZE_MAX {
81            return Err(Error::new(ErrorKind::InvalidInput, "dict size too large"));
82        }
83        Self::construct2(
84            reader,
85            uncomp_size,
86            lc as _,
87            lp as _,
88            pb as _,
89            dict_size,
90            preset_dict,
91        )
92    }
93
94    fn construct2(
95        reader: R,
96        uncomp_size: u64,
97        lc: u32,
98        lp: u32,
99        pb: u32,
100        dict_size: u32,
101        preset_dict: Option<&[u8]>,
102    ) -> std::io::Result<Self> {
103        if lc > 8 || lp > 4 || pb > 4 {
104            return Err(Error::new(
105                ErrorKind::InvalidInput,
106                "Invalid lc or lp or pb",
107            ));
108        }
109        let mut dict_size = get_dict_size(dict_size)?;
110        if uncomp_size <= u64::MAX / 2 && dict_size as u64 > uncomp_size {
111            dict_size = get_dict_size(uncomp_size as u32)?;
112        }
113        let rc = RangeDecoder::new_stream(reader);
114        let rc = match rc {
115            Ok(r) => r,
116            Err(e) => {
117                return Err(e);
118            }
119        };
120        let lz = LZDecoder::new(get_dict_size(dict_size)? as _, preset_dict);
121        let lzma = LZMADecoder::new(lc, lp, pb);
122        Ok(Self {
123            // reader,
124            lz,
125            rc,
126            lzma,
127            end_reached: false,
128            relaxed_end_cond: true,
129            remaining_size: uncomp_size,
130        })
131    }
132
133    /// Creates a new .lzma file format decompressor with an optional memory usage limit.
134    /// - [mem_limit_kb] - memory usage limit in kibibytes (KiB). u32::MAX means no limit.
135    /// - [preset_dict] - preset dictionary or None to use no preset dictionary.
136    pub fn new_mem_limit(
137        mut reader: R,
138        mem_limit_kb: u32,
139        preset_dict: Option<&[u8]>,
140    ) -> std::io::Result<Self> {
141        let props = reader.read_u8()?;
142        let dict_size = reader.read_u32::<LittleEndian>()?;
143
144        let uncomp_size = reader.read_u64::<LittleEndian>()?;
145        let need_mem = get_memory_usage_by_props(dict_size, props)?;
146        if mem_limit_kb < need_mem {
147            return Err(Error::new(
148                ErrorKind::OutOfMemory,
149                format!(
150                    "{}kb memory needed,but limit was {}kb",
151                    need_mem, mem_limit_kb
152                ),
153            ));
154        }
155        Self::construct1(reader, uncomp_size, props, dict_size, preset_dict)
156    }
157
158    /// Creates a new input stream that decompresses raw LZMA data (no .lzma header) from `reader` optionally with a preset dictionary.
159    /// - [reader] - the reader to read compressed data from.
160    /// - [uncomp_size] - the uncompressed size of the data to be decompressed.
161    /// - [props] - the LZMA properties byte.
162    /// - [dict_size] - the LZMA dictionary size.
163    /// - [preset_dict] - preset dictionary or None to use no preset dictionary.
164    pub fn new_with_props(
165        reader: R,
166        uncomp_size: u64,
167        props: u8,
168        dict_size: u32,
169        preset_dict: Option<&[u8]>,
170    ) -> std::io::Result<Self> {
171        Self::construct1(reader, uncomp_size, props, dict_size, preset_dict)
172    }
173
174    /// Creates a new input stream that decompresses raw LZMA data (no .lzma header) from `reader` optionally with a preset dictionary.
175    /// - [reader] - the input stream to read compressed data from.
176    /// - [uncomp_size] - the uncompressed size of the data to be decompressed.
177    /// - [lc] - the number of literal context bits.
178    /// - [lp] - the number of literal position bits.
179    /// - [pb] - the number of position bits.
180    /// - [dict_size] - the LZMA dictionary size.
181    /// - [preset_dict] - preset dictionary or None to use no preset dictionary.
182    pub fn new(
183        reader: R,
184        uncomp_size: u64,
185        lc: u32,
186        lp: u32,
187        pb: u32,
188        dict_size: u32,
189        preset_dict: Option<&[u8]>,
190    ) -> std::io::Result<Self> {
191        Self::construct2(reader, uncomp_size, lc, lp, pb, dict_size, preset_dict)
192    }
193
194    fn read_decode(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
195        if buf.is_empty() {
196            return Ok(0);
197        }
198        if self.end_reached {
199            return Ok(0);
200        }
201        let mut size = 0;
202        let mut len = buf.len() as u32;
203        let mut off = 0u32;
204        while len > 0 {
205            let mut copy_size_max = len;
206            if self.remaining_size <= u64::MAX / 2 && (self.remaining_size as u32) < len {
207                copy_size_max = self.remaining_size as u32;
208            }
209            self.lz.set_limit(copy_size_max as usize);
210
211            match self.lzma.decode(&mut self.lz, &mut self.rc) {
212                Ok(_) => {}
213                Err(e) => {
214                    if self.remaining_size != u64::MAX || !self.lzma.end_marker_detected() {
215                        return Err(e);
216                    }
217                    self.end_reached = true;
218                    self.rc.normalize()?;
219                }
220            }
221
222            let copied_size = self.lz.flush(buf, off as _) as u32;
223            off += copied_size;
224            len -= copied_size;
225            size += copied_size;
226            if self.remaining_size <= u64::MAX / 2 {
227                self.remaining_size -= copied_size as u64;
228                if self.remaining_size == 0 {
229                    self.end_reached = true;
230                }
231            }
232
233            if self.end_reached {
234                if self.lz.has_pending()
235                    || (!self.relaxed_end_cond && !self.rc.is_stream_finished())
236                {
237                    return Err(Error::new(
238                        ErrorKind::InvalidData,
239                        "end reached but not decoder finished",
240                    ));
241                }
242                return Ok(size as _);
243            }
244        }
245        Ok(size as _)
246    }
247}
248
249impl<R: Read> Read for LZMAReader<R> {
250    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
251        self.read_decode(buf)
252    }
253}