lzma_rust2/enc/
lzma2_writer.rs

1use std::io::{ErrorKind, Write};
2
3use byteorder::WriteBytesExt;
4
5use super::counting::CountingWriter;
6
7use super::encoder::LZMAEncoderModes;
8use super::{
9    encoder::{EncodeMode, LZMAEncoder},
10    lz::MFType,
11    range_enc::{RangeEncoder, RangeEncoderBuffer},
12};
13
14#[derive(Debug, Clone)]
15pub struct LZMA2Options {
16    pub dict_size: u32,
17    pub lc: u32,
18    pub lp: u32,
19    pub pb: u32,
20    pub mode: EncodeMode,
21    pub nice_len: u32,
22    pub mf: MFType,
23    pub depth_limit: i32,
24    pub preset_dict: Option<Vec<u8>>,
25}
26
27impl Default for LZMA2Options {
28    fn default() -> Self {
29        Self::with_preset(6)
30    }
31}
32
33impl LZMA2Options {
34    pub const LC_DEFAULT: u32 = 3;
35    pub const LP_DEFAULT: u32 = 0;
36    pub const PB_DEFAULT: u32 = 2;
37    pub const NICE_LEN_MAX: u32 = 273;
38    pub const NICE_LEN_MIN: u32 = 8;
39    pub const DICT_SIZE_DEFAULT: u32 = 8 << 20;
40    const PRESET_TO_DICT_SIZE: &'static [u32] = &[
41        1 << 18,
42        1 << 20,
43        1 << 21,
44        1 << 22,
45        1 << 22,
46        1 << 23,
47        1 << 23,
48        1 << 24,
49        1 << 25,
50        1 << 26,
51    ];
52    const PRESET_TO_DEPTH_LIMIT: &'static [i32] = &[4, 8, 24, 48];
53
54    #[allow(clippy::too_many_arguments)]
55    pub fn new(
56        dict_size: u32,
57        lc: u32,
58        lp: u32,
59        pb: u32,
60        mode: EncodeMode,
61        nice_len: u32,
62        mf: MFType,
63        depth_limit: i32,
64    ) -> Self {
65        Self {
66            dict_size,
67            lc,
68            lp,
69            pb,
70            mode,
71            nice_len,
72            mf,
73            depth_limit,
74            preset_dict: None,
75        }
76    }
77
78    /// preset: [0..9]
79    #[inline]
80    pub fn with_preset(preset: u32) -> Self {
81        let mut opt = Self {
82            dict_size: Default::default(),
83            lc: Default::default(),
84            lp: Default::default(),
85            pb: Default::default(),
86            mode: EncodeMode::Normal,
87            nice_len: Default::default(),
88            mf: Default::default(),
89            depth_limit: Default::default(),
90            preset_dict: Default::default(),
91        };
92        opt.set_preset(preset);
93        opt
94    }
95
96    /// preset: [0..9]
97    pub fn set_preset(&mut self, preset: u32) {
98        if preset > 9 {
99            return;
100        }
101        self.lc = Self::LC_DEFAULT;
102        self.lp = Self::LP_DEFAULT;
103        self.pb = Self::PB_DEFAULT;
104        self.dict_size = Self::PRESET_TO_DICT_SIZE[preset as usize];
105        if preset <= 3 {
106            self.mode = EncodeMode::Fast;
107            self.mf = MFType::HC4;
108            self.nice_len = if preset <= 1 { 128 } else { Self::NICE_LEN_MAX };
109            self.depth_limit = Self::PRESET_TO_DEPTH_LIMIT[preset as usize];
110        } else {
111            self.mode = EncodeMode::Normal;
112            self.mf = MFType::BT4;
113            self.nice_len = if preset == 4 {
114                16
115            } else if preset == 5 {
116                32
117            } else {
118                64
119            };
120            self.depth_limit = 0;
121        }
122    }
123
124    pub fn get_memory_usage(&self) -> u32 {
125        let dict_size = self.dict_size;
126        let extra_size_before = get_extra_size_before(dict_size);
127        70 + LZMAEncoder::get_mem_usage(self.mode, dict_size, extra_size_before, self.mf)
128    }
129
130    #[inline(always)]
131    pub fn get_props(&self) -> u8 {
132        ((self.pb * 5 + self.lp) * 9 + self.lc) as u8
133    }
134}
135
136const COMPRESSED_SIZE_MAX: u32 = 64 << 10;
137
138pub fn get_extra_size_before(dict_size: u32) -> u32 {
139    if COMPRESSED_SIZE_MAX > dict_size {
140        COMPRESSED_SIZE_MAX - dict_size
141    } else {
142        0
143    }
144}
145
146/// LZMA2 format writer
147/// # Examples
148/// ```
149/// use std::io::Write;
150/// use lzma_rust2::{LZMA2Options, LZMA2Writer, CountingWriter};
151///
152/// let mut writer = LZMA2Writer::new(CountingWriter::new(Vec::new()), &LZMA2Options::default());
153///    writer.write_all(b"hello world").unwrap();
154///    let compressed = writer.finish().unwrap();
155///
156/// ```
157pub struct LZMA2Writer<W: Write> {
158    inner: CountingWriter<W>,
159    rc: RangeEncoder<RangeEncoderBuffer>,
160    lzma: LZMAEncoder,
161    mode: LZMAEncoderModes,
162    props: u8,
163    dict_reset_needed: bool,
164    state_reset_needed: bool,
165    props_needed: bool,
166    pending_size: u32,
167    finished: bool,
168}
169
170impl<W: Write> LZMA2Writer<W> {
171    pub fn new(inner: CountingWriter<W>, options: &LZMA2Options) -> Self {
172        let dict_size = options.dict_size;
173        let rc = RangeEncoder::new_buffer(COMPRESSED_SIZE_MAX as usize);
174        let (mut lzma, mode) = LZMAEncoder::new(
175            options.mode,
176            options.lc,
177            options.lp,
178            options.pb,
179            options.mf,
180            options.depth_limit,
181            options.dict_size,
182            options.nice_len as usize,
183        );
184
185        let props = options.get_props();
186        let mut dict_reset_needed = true;
187        if let Some(preset_dict) = &options.preset_dict {
188            lzma.lz.set_preset_dict(dict_size, preset_dict);
189            dict_reset_needed = false;
190        }
191        Self {
192            inner,
193            rc,
194            lzma,
195            mode,
196            props,
197            dict_reset_needed,
198            state_reset_needed: true,
199            props_needed: true,
200            pending_size: 0,
201            finished: false,
202        }
203    }
204
205    fn write_lzma(&mut self, uncompressed_size: u32, compressed_size: u32) -> std::io::Result<()> {
206        let mut control = if self.props_needed {
207            if self.dict_reset_needed {
208                0x80 + (3 << 5)
209            } else {
210                0x80 + (2 << 5)
211            }
212        } else if self.state_reset_needed {
213            0x80 + (1 << 5)
214        } else {
215            0x80
216        };
217        control |= (uncompressed_size - 1) >> 16;
218        let mut chunk_header = [0u8; 6];
219        chunk_header[0] = control as u8;
220        chunk_header[1] = ((uncompressed_size - 1) >> 8) as u8;
221        chunk_header[2] = (uncompressed_size - 1) as u8;
222        chunk_header[3] = ((compressed_size - 1) >> 8) as u8;
223        chunk_header[4] = (compressed_size - 1) as u8;
224        if self.props_needed {
225            chunk_header[5] = self.props;
226            self.inner.write_all(&chunk_header)?;
227        } else {
228            self.inner.write_all(&chunk_header[..5])?;
229        }
230
231        self.rc.write_to(&mut self.inner)?;
232        self.props_needed = false;
233        self.state_reset_needed = false;
234        self.dict_reset_needed = false;
235        Ok(())
236    }
237
238    fn write_uncompressed(&mut self, mut uncompressed_size: u32) -> std::io::Result<()> {
239        while uncompressed_size > 0 {
240            let chunk_size = uncompressed_size.min(COMPRESSED_SIZE_MAX);
241            let mut chunk_header = [0u8; 3];
242            chunk_header[0] = if self.dict_reset_needed { 0x01 } else { 0x02 };
243            chunk_header[1] = ((chunk_size - 1) >> 8) as u8;
244            chunk_header[2] = (chunk_size - 1) as u8;
245            self.inner.write_all(&chunk_header)?;
246            self.lzma.lz.copy_uncompressed(
247                &mut self.inner,
248                uncompressed_size as i32,
249                chunk_size as usize,
250            )?;
251            uncompressed_size -= chunk_size;
252            self.dict_reset_needed = false;
253        }
254        self.state_reset_needed = true;
255        Ok(())
256    }
257
258    fn write_chunk(&mut self) -> std::io::Result<()> {
259        let compressed_size = self.rc.finish_buffer()?.unwrap_or_default() as u32;
260        let mut uncompressed_size = self.lzma.data.uncompressed_size;
261        assert!(compressed_size > 0);
262        assert!(
263            uncompressed_size > 0,
264            "uncompressed_size is 0, read_pos={}",
265            self.lzma.lz.read_pos
266        );
267        if compressed_size + 2 < uncompressed_size {
268            self.write_lzma(uncompressed_size, compressed_size)?;
269        } else {
270            self.lzma.reset(&mut self.mode);
271            uncompressed_size = self.lzma.data.uncompressed_size;
272            assert!(uncompressed_size > 0);
273            self.write_uncompressed(uncompressed_size)?;
274        }
275        self.pending_size -= uncompressed_size;
276        self.lzma.reset_uncompressed_size();
277        self.rc.reset_buffer();
278        Ok(())
279    }
280
281    fn write_end_marker(&mut self) -> std::io::Result<()> {
282        assert!(!self.finished);
283
284        self.lzma.lz.set_finishing();
285
286        while self.pending_size > 0 {
287            self.lzma.encode_for_lzma2(&mut self.rc, &mut self.mode)?;
288            self.write_chunk()?;
289        }
290
291        self.inner.write_u8(0x00)?;
292        self.finished = true;
293
294        Ok(())
295    }
296
297    pub fn finish(&mut self) -> std::io::Result<()> {
298        if !self.finished {
299            self.write_end_marker()?;
300        }
301        Ok(())
302    }
303}
304
305impl<W: Write> Write for LZMA2Writer<W> {
306    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
307        let mut len = buf.len();
308        if len == 0 && !self.finished {
309            self.finish()?;
310            self.inner.write(buf)?;
311            return Ok(0);
312        }
313        if self.finished {
314            return Err(std::io::Error::new(ErrorKind::Other, "LZMA2 finished"));
315        }
316
317        let mut off = 0;
318        while len > 0 {
319            let used = self.lzma.lz.fill_window(&buf[off..(off + len)]);
320            off += used;
321            len -= used;
322            self.pending_size += used as u32;
323            if self.lzma.encode_for_lzma2(&mut self.rc, &mut self.mode)? {
324                self.write_chunk()?;
325            }
326        }
327        Ok(off)
328    }
329
330    fn flush(&mut self) -> std::io::Result<()> {
331        if self.finished {
332            return Err(std::io::Error::new(
333                ErrorKind::Other,
334                "LZMA2 flush finished",
335            ));
336        }
337        self.lzma.lz.set_flushing();
338        while self.pending_size > 0 {
339            self.lzma.encode_for_lzma2(&mut self.rc, &mut self.mode)?;
340            self.write_chunk()?;
341        }
342        self.inner.flush()
343    }
344}