lzma_rust2/enc/
lzma2_writer.rs

1use alloc::vec::Vec;
2use core::num::NonZeroU64;
3
4use super::{
5    encoder::{EncodeMode, LzmaEncoder, LzmaEncoderModes},
6    lz::MfType,
7    range_enc::{RangeEncoder, RangeEncoderBuffer},
8};
9use crate::{AutoFinish, AutoFinisher, ByteWriter, Write};
10
11/// Encoder settings when compressing with LZMA and LZMA2.
12#[derive(Debug, Clone)]
13pub struct LzmaOptions {
14    /// Dictionary size in bytes.
15    pub dict_size: u32,
16    /// Number of literal context bits (0-8).
17    pub lc: u32,
18    /// Number of literal position bits (0-4).
19    pub lp: u32,
20    /// Number of position bits (0-4).
21    pub pb: u32,
22    /// Compression mode.
23    pub mode: EncodeMode,
24    /// Match finder nice length.
25    pub nice_len: u32,
26    /// Match finder type.
27    pub mf: MfType,
28    /// Match finder depth limit.
29    pub depth_limit: i32,
30    /// Preset dictionary data.
31    pub preset_dict: Option<Vec<u8>>,
32}
33
34impl Default for LzmaOptions {
35    fn default() -> Self {
36        Self::with_preset(6)
37    }
38}
39
40impl LzmaOptions {
41    /// Default number of literal context bits.
42    pub const LC_DEFAULT: u32 = 3;
43
44    /// Default number of literal position bits.
45    pub const LP_DEFAULT: u32 = 0;
46
47    /// Default number of position bits.
48    pub const PB_DEFAULT: u32 = 2;
49
50    /// Maximum match finder nice length.
51    pub const NICE_LEN_MAX: u32 = 273;
52
53    /// Minimum match finder nice length.
54    pub const NICE_LEN_MIN: u32 = 8;
55
56    /// Default dictionary size (8MB).
57    pub const DICT_SIZE_DEFAULT: u32 = 8 << 20;
58
59    const PRESET_TO_DICT_SIZE: &'static [u32] = &[
60        1 << 18,
61        1 << 20,
62        1 << 21,
63        1 << 22,
64        1 << 22,
65        1 << 23,
66        1 << 23,
67        1 << 24,
68        1 << 25,
69        1 << 26,
70    ];
71
72    const PRESET_TO_DEPTH_LIMIT: &'static [i32] = &[4, 8, 24, 48];
73
74    /// Creates new LZMA encoding options with specified parameters.
75    #[allow(clippy::too_many_arguments)]
76    pub fn new(
77        dict_size: u32,
78        lc: u32,
79        lp: u32,
80        pb: u32,
81        mode: EncodeMode,
82        nice_len: u32,
83        mf: MfType,
84        depth_limit: i32,
85    ) -> Self {
86        Self {
87            dict_size,
88            lc,
89            lp,
90            pb,
91            mode,
92            nice_len,
93            mf,
94            depth_limit,
95            preset_dict: None,
96        }
97    }
98
99    /// preset: [0..9]
100    #[inline]
101    pub fn with_preset(preset: u32) -> Self {
102        let mut opt = Self {
103            dict_size: Default::default(),
104            lc: Default::default(),
105            lp: Default::default(),
106            pb: Default::default(),
107            mode: EncodeMode::Normal,
108            nice_len: Default::default(),
109            mf: Default::default(),
110            depth_limit: Default::default(),
111            preset_dict: Default::default(),
112        };
113        opt.set_preset(preset);
114        opt
115    }
116
117    /// preset: [0..9]
118    pub fn set_preset(&mut self, preset: u32) {
119        let preset = preset.min(9);
120
121        self.lc = Self::LC_DEFAULT;
122        self.lp = Self::LP_DEFAULT;
123        self.pb = Self::PB_DEFAULT;
124        self.dict_size = Self::PRESET_TO_DICT_SIZE[preset as usize];
125        if preset <= 3 {
126            self.mode = EncodeMode::Fast;
127            self.mf = MfType::Hc4;
128            self.nice_len = if preset <= 1 { 128 } else { Self::NICE_LEN_MAX };
129            self.depth_limit = Self::PRESET_TO_DEPTH_LIMIT[preset as usize];
130        } else {
131            self.mode = EncodeMode::Normal;
132            self.mf = MfType::Bt4;
133            self.nice_len = if preset == 4 {
134                16
135            } else if preset == 5 {
136                32
137            } else {
138                64
139            };
140            self.depth_limit = 0;
141        }
142    }
143
144    /// Returns the estimated memory usage in kilobytes for these options.
145    pub fn get_memory_usage(&self) -> u32 {
146        let dict_size = self.dict_size;
147        let extra_size_before = get_extra_size_before(dict_size);
148        70 + LzmaEncoder::get_mem_usage(self.mode, dict_size, extra_size_before, self.mf)
149    }
150
151    /// Returns the LZMA properties byte for these options.
152    #[inline(always)]
153    pub fn get_props(&self) -> u8 {
154        ((self.pb * 5 + self.lp) * 9 + self.lc) as u8
155    }
156}
157
158/// Options for LZMA2 compression.
159#[derive(Default, Debug, Clone)]
160pub struct Lzma2Options {
161    /// LZMA compression options.
162    pub lzma_options: LzmaOptions,
163    /// The size of each independent chunk in bytes.
164    /// If not set, the whole data will be written as one chunk.
165    /// Will get clamped to be at least the dict size to not waste memory.
166    pub chunk_size: Option<NonZeroU64>,
167}
168
169impl Lzma2Options {
170    /// Create options with specific preset.
171    pub fn with_preset(preset: u32) -> Self {
172        Self {
173            lzma_options: LzmaOptions::with_preset(preset),
174            chunk_size: None,
175        }
176    }
177
178    /// Set the chunk size (None means a single chunk, which is the default).
179    /// Chunk size will be clamped to be at least the dictionary size.
180    pub fn set_chunk_size(&mut self, chunk_size: Option<NonZeroU64>) {
181        self.chunk_size = chunk_size;
182    }
183}
184
185const COMPRESSED_SIZE_MAX: u32 = 64 << 10;
186
187/// Calculates the extra space needed before the dictionary for LZMA2 encoding.
188pub fn get_extra_size_before(dict_size: u32) -> u32 {
189    COMPRESSED_SIZE_MAX.saturating_sub(dict_size)
190}
191
192/// A single-threaded LZMA2 compressor.
193pub struct Lzma2Writer<W: Write> {
194    inner: W,
195    rc: RangeEncoder<RangeEncoderBuffer>,
196    lzma: LzmaEncoder,
197    mode: LzmaEncoderModes,
198    dict_reset_needed: bool,
199    state_reset_needed: bool,
200    props_needed: bool,
201    pending_size: u32,
202    chunk_size: Option<u64>,
203    uncompressed_size: u64,
204    force_independent_chunk: bool,
205    options: Lzma2Options,
206}
207
208impl<W: Write> Lzma2Writer<W> {
209    /// Creates a new LZMA2 writer that will write compressed data to the given writer.
210    pub fn new(inner: W, options: Lzma2Options) -> Self {
211        let lzma_options = &options.lzma_options;
212        let dict_size = lzma_options.dict_size;
213
214        let rc = RangeEncoder::new_buffer(COMPRESSED_SIZE_MAX as usize);
215        let (mut lzma, mode) = LzmaEncoder::new(
216            lzma_options.mode,
217            lzma_options.lc,
218            lzma_options.lp,
219            lzma_options.pb,
220            lzma_options.mf,
221            lzma_options.depth_limit,
222            lzma_options.dict_size,
223            lzma_options.nice_len as usize,
224        );
225
226        let mut dict_reset_needed = true;
227        if let Some(preset_dict) = &lzma_options.preset_dict {
228            lzma.lz.set_preset_dict(dict_size, preset_dict);
229            dict_reset_needed = false;
230        }
231
232        let chunk_size = options.chunk_size.map(|s| s.get().max(dict_size as u64));
233
234        Self {
235            inner,
236            rc,
237            lzma,
238            mode,
239
240            dict_reset_needed,
241            state_reset_needed: true,
242            props_needed: true,
243            pending_size: 0,
244            chunk_size,
245            uncompressed_size: 0,
246            force_independent_chunk: false,
247            options,
248        }
249    }
250
251    fn should_start_independent_chunk(&self) -> bool {
252        if let Some(chunk_size) = self.chunk_size {
253            self.uncompressed_size >= chunk_size
254        } else {
255            false
256        }
257    }
258
259    fn start_independent_chunk(&mut self) -> crate::Result<()> {
260        self.lzma.lz.set_flushing();
261
262        while self.pending_size > 0 {
263            self.lzma.encode_for_lzma2(&mut self.rc, &mut self.mode)?;
264            self.write_chunk()?;
265        }
266
267        self.force_independent_chunk = true;
268        self.dict_reset_needed = true;
269        self.state_reset_needed = true;
270        self.props_needed = true;
271        self.uncompressed_size = 0;
272
273        let lzma_options = &self.options.lzma_options;
274
275        let (new_lzma, new_mode) = LzmaEncoder::new(
276            lzma_options.mode,
277            lzma_options.lc,
278            lzma_options.lp,
279            lzma_options.pb,
280            lzma_options.mf,
281            lzma_options.depth_limit,
282            lzma_options.dict_size,
283            lzma_options.nice_len as usize,
284        );
285
286        self.lzma = new_lzma;
287        self.mode = new_mode;
288        self.rc = RangeEncoder::new_buffer(COMPRESSED_SIZE_MAX as usize);
289
290        Ok(())
291    }
292
293    fn write_lzma(&mut self, uncompressed_size: u32, compressed_size: u32) -> crate::Result<()> {
294        let mut control = if self.props_needed || self.force_independent_chunk {
295            if self.dict_reset_needed || self.force_independent_chunk {
296                0x80 + (3 << 5)
297            } else {
298                0x80 + (2 << 5)
299            }
300        } else if self.state_reset_needed {
301            0x80 + (1 << 5)
302        } else {
303            0x80
304        };
305        control |= (uncompressed_size - 1) >> 16;
306
307        let mut chunk_header = [0u8; 6];
308        chunk_header[0] = control as u8;
309        chunk_header[1] = ((uncompressed_size - 1) >> 8) as u8;
310        chunk_header[2] = (uncompressed_size - 1) as u8;
311        chunk_header[3] = ((compressed_size - 1) >> 8) as u8;
312        chunk_header[4] = (compressed_size - 1) as u8;
313        if self.props_needed {
314            chunk_header[5] = self.options.lzma_options.get_props();
315            self.inner.write_all(&chunk_header)?;
316        } else {
317            self.inner.write_all(&chunk_header[..5])?;
318        }
319
320        self.rc.write_to(&mut self.inner)?;
321        self.props_needed = false;
322        self.state_reset_needed = false;
323        self.dict_reset_needed = false;
324        self.force_independent_chunk = false;
325        Ok(())
326    }
327
328    fn write_uncompressed(&mut self, mut uncompressed_size: u32) -> crate::Result<()> {
329        while uncompressed_size > 0 {
330            let chunk_size = uncompressed_size.min(COMPRESSED_SIZE_MAX);
331            let mut chunk_header = [0u8; 3];
332            chunk_header[0] = if self.dict_reset_needed { 0x01 } else { 0x02 };
333            chunk_header[1] = ((chunk_size - 1) >> 8) as u8;
334            chunk_header[2] = (chunk_size - 1) as u8;
335            self.inner.write_all(&chunk_header)?;
336            self.lzma.lz.copy_uncompressed(
337                &mut self.inner,
338                uncompressed_size as i32,
339                chunk_size as usize,
340            )?;
341            uncompressed_size -= chunk_size;
342            self.dict_reset_needed = false;
343        }
344        self.state_reset_needed = true;
345        Ok(())
346    }
347
348    fn write_chunk(&mut self) -> crate::Result<()> {
349        let compressed_size = self.rc.finish_buffer()?.unwrap_or_default() as u32;
350        let mut uncompressed_size = self.lzma.data.uncompressed_size;
351        debug_assert!(compressed_size > 0);
352        debug_assert!(
353            uncompressed_size > 0,
354            "uncompressed_size is 0, read_pos={}",
355            self.lzma.lz.read_pos,
356        );
357        if compressed_size + 2 < uncompressed_size {
358            self.write_lzma(uncompressed_size, compressed_size)?;
359        } else {
360            self.lzma.reset(&mut self.mode);
361            uncompressed_size = self.lzma.data.uncompressed_size;
362            debug_assert!(uncompressed_size > 0);
363            self.write_uncompressed(uncompressed_size)?;
364        }
365        self.pending_size -= uncompressed_size;
366        self.uncompressed_size += uncompressed_size as u64;
367
368        self.lzma.reset_uncompressed_size();
369        self.rc.reset_buffer();
370        Ok(())
371    }
372
373    /// Returns a wrapper around `self` that will finish the stream on drop.
374    pub fn auto_finish(self) -> AutoFinisher<Self> {
375        AutoFinisher(Some(self))
376    }
377
378    /// Unwraps the writer, returning the underlying writer.
379    pub fn into_inner(self) -> W {
380        self.inner
381    }
382
383    /// Returns a reference to the inner writer.
384    pub fn inner(&self) -> &W {
385        &self.inner
386    }
387
388    /// Returns a mutable reference to the inner writer.
389    pub fn inner_mut(&mut self) -> &mut W {
390        &mut self.inner
391    }
392
393    /// Finishes the compression and returns the underlying writer.
394    pub fn finish(mut self) -> crate::Result<W> {
395        self.lzma.lz.set_finishing();
396
397        while self.pending_size > 0 {
398            self.lzma.encode_for_lzma2(&mut self.rc, &mut self.mode)?;
399            self.write_chunk()?;
400        }
401
402        self.inner.write_u8(0x00)?;
403
404        Ok(self.inner)
405    }
406}
407
408impl<W: Write> Write for Lzma2Writer<W> {
409    fn write(&mut self, buf: &[u8]) -> crate::Result<usize> {
410        let mut len = buf.len();
411
412        let mut off = 0;
413        while len > 0 {
414            if self.should_start_independent_chunk() {
415                self.start_independent_chunk()?;
416            }
417
418            let used = self.lzma.lz.fill_window(&buf[off..(off + len)]);
419            off += used;
420            len -= used;
421            self.pending_size += used as u32;
422            if self.lzma.encode_for_lzma2(&mut self.rc, &mut self.mode)? {
423                self.write_chunk()?;
424            }
425        }
426        Ok(off)
427    }
428
429    fn flush(&mut self) -> crate::Result<()> {
430        self.lzma.lz.set_flushing();
431
432        while self.pending_size > 0 {
433            self.lzma.encode_for_lzma2(&mut self.rc, &mut self.mode)?;
434            self.write_chunk()?;
435        }
436
437        self.inner.flush()
438    }
439}
440
441impl<W: Write> AutoFinish for Lzma2Writer<W> {
442    fn finish_ignore_error(self) {
443        let _ = self.finish();
444    }
445}