lzma_rust2/
lib.rs

1//! LZMA / LZMA2 / LZIP / XZ compression ported from [tukaani xz for java](https://tukaani.org/xz/java.html).
2//!
3//! This is a fork of the original, unmaintained lzma-rust crate to continue the development and
4//! maintenance.
5//!
6//! ## Safety
7//!
8//! Only the `optimization` feature uses unsafe Rust features to implement optimizations, that are
9//! not possible in safe Rust. Those optimizations are properly guarded and are of course sound.
10//! This includes creation of aligned memory, handwritten assembly code for hot functions and some
11//! pointer logic. Those optimization are well localized and generally consider safe to use, even
12//! with untrusted input.
13//!
14//! Deactivating the `optimization` feature will result in 100% standard Rust code.
15//!
16//! ## Performance
17//!
18//! When compared against the `liblzma` crate, which uses the C library of the same name, this crate
19//! has improved decoding speed.
20//!
21//! Encoding is also well optimized and is surpassing `liblzma` for level 0 to 3 and matches it for
22//! level 4 to 9.
23//!
24//! ## no_std Support
25//!
26//! This crate supports `no_std` environments by disabling the default `std` feature.
27//!
28//! When used in `no_std` mode, the crate provides custom `Read`, `Write`, and `Error` types
29//! (defined in `no_std.rs`) that are compatible with `no_std` environments. These types offer
30//! similar functionality to their `std::io` counterparts but are implemented using only `core`
31//! and `alloc`.
32//!
33//! The custom types include:
34//!
35//! - [`Error`]: A custom error enum with variants for different error conditions.
36//! - [`Read`]: A trait similar to `std::io::Read` with `read()` and `read_exact()` methods.
37//! - [`Write`]: A trait similar to `std::io::Write` with `write()`, `write_all()`, and `flush()`
38//!   methods.
39//!
40//! Default implementations for `&[u8]` (Read) and `&mut [u8]` (Write) are provided.
41//!
42//! Note that multithreaded features are not available in `no_std` mode as they require
43//! standard library threading primitives.
44//!
45//! ## License
46//!
47//! Licensed under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0).
48
49// TODO: There is a lot of code left that only the "encode" feature uses.
50#![allow(dead_code)]
51#![warn(missing_docs)]
52#![cfg_attr(not(feature = "optimization"), forbid(unsafe_code))]
53#![cfg_attr(docsrs, feature(doc_cfg))]
54#![cfg_attr(not(feature = "std"), no_std)]
55
56extern crate alloc;
57
58mod decoder;
59mod lz;
60#[cfg(feature = "lzip")]
61mod lzip;
62mod lzma2_reader;
63mod lzma_reader;
64mod range_dec;
65mod state;
66#[cfg(feature = "std")]
67mod work_queue;
68#[cfg(feature = "xz")]
69mod xz;
70
71#[cfg(feature = "encoder")]
72mod enc;
73
74pub mod filter;
75
76#[cfg(feature = "std")]
77mod lzma2_reader_mt;
78#[cfg(not(feature = "std"))]
79mod no_std;
80#[cfg(feature = "std")]
81mod work_pool;
82
83#[cfg(feature = "std")]
84pub(crate) use std::io::Error;
85#[cfg(feature = "std")]
86pub(crate) use std::io::Read;
87#[cfg(feature = "std")]
88pub(crate) use std::io::Write;
89
90#[cfg(feature = "encoder")]
91pub use enc::*;
92pub use lz::MfType;
93#[cfg(feature = "lzip")]
94pub use lzip::LzipReader;
95#[cfg(all(feature = "lzip", feature = "std"))]
96pub use lzip::LzipReaderMt;
97#[cfg(all(feature = "lzip", feature = "encoder", feature = "std"))]
98pub use lzip::LzipWriterMt;
99#[cfg(all(feature = "lzip", feature = "encoder"))]
100pub use lzip::{LzipOptions, LzipWriter};
101pub use lzma2_reader::{get_memory_usage as lzma2_get_memory_usage, Lzma2Reader};
102#[cfg(feature = "std")]
103pub use lzma2_reader_mt::Lzma2ReaderMt;
104pub use lzma_reader::{
105    get_memory_usage as lzma_get_memory_usage,
106    get_memory_usage_by_props as lzma_get_memory_usage_by_props, LzmaReader,
107};
108#[cfg(not(feature = "std"))]
109pub use no_std::Error;
110#[cfg(not(feature = "std"))]
111pub use no_std::Read;
112#[cfg(not(feature = "std"))]
113pub use no_std::Write;
114use state::*;
115#[cfg(all(feature = "xz", feature = "std"))]
116pub use xz::XzReaderMt;
117#[cfg(all(feature = "xz", feature = "encoder", feature = "std"))]
118pub use xz::XzWriterMt;
119#[cfg(feature = "xz")]
120pub use xz::{CheckType, FilterConfig, FilterType, XzReader};
121#[cfg(all(feature = "xz", feature = "encoder"))]
122pub use xz::{XzOptions, XzWriter};
123
124/// Result type of the crate.
125#[cfg(feature = "std")]
126pub type Result<T> = core::result::Result<T, Error>;
127
128/// Result type of the crate.
129#[cfg(not(feature = "std"))]
130pub type Result<T> = core::result::Result<T, Error>;
131
132/// The minimal size of a dictionary.
133pub const DICT_SIZE_MIN: u32 = 4096;
134
135/// The maximal size of a dictionary.
136pub const DICT_SIZE_MAX: u32 = u32::MAX & !15_u32;
137
138const LOW_SYMBOLS: usize = 1 << 3;
139const MID_SYMBOLS: usize = 1 << 3;
140const HIGH_SYMBOLS: usize = 1 << 8;
141
142const POS_STATES_MAX: usize = 1 << 4;
143const MATCH_LEN_MIN: usize = 2;
144const MATCH_LEN_MAX: usize = MATCH_LEN_MIN + LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS - 1;
145
146const DIST_STATES: usize = 4;
147const DIST_SLOTS: usize = 1 << 6;
148const DIST_MODEL_START: usize = 4;
149const DIST_MODEL_END: usize = 14;
150const FULL_DISTANCES: usize = 1 << (DIST_MODEL_END / 2);
151
152const ALIGN_BITS: usize = 4;
153const ALIGN_SIZE: usize = 1 << ALIGN_BITS;
154const ALIGN_MASK: usize = ALIGN_SIZE - 1;
155
156const REPS: usize = 4;
157
158const SHIFT_BITS: u32 = 8;
159const TOP_MASK: u32 = 0xFF000000;
160const BIT_MODEL_TOTAL_BITS: u32 = 11;
161const BIT_MODEL_TOTAL: u32 = 1 << BIT_MODEL_TOTAL_BITS;
162const PROB_INIT: u16 = (BIT_MODEL_TOTAL / 2) as u16;
163const MOVE_BITS: u32 = 5;
164const DIST_SPECIAL_INDEX: [usize; 10] = [0, 2, 4, 8, 12, 20, 28, 44, 60, 92];
165const DIST_SPECIAL_END: [usize; 10] = [2, 4, 8, 12, 20, 28, 44, 60, 92, 124];
166const TOP_VALUE: u32 = 0x0100_0000;
167const RC_BIT_MODEL_OFFSET: u32 = (1u32 << MOVE_BITS)
168    .wrapping_sub(1)
169    .wrapping_sub(BIT_MODEL_TOTAL);
170
171/// Helper to set the shared error state and trigger shutdown.
172#[cfg(feature = "std")]
173fn set_error(
174    error: Error,
175    error_store: &std::sync::Arc<std::sync::Mutex<Option<Error>>>,
176    shutdown_flag: &std::sync::Arc<std::sync::atomic::AtomicBool>,
177) {
178    let mut guard = error_store.lock().unwrap();
179    if guard.is_none() {
180        *guard = Some(error);
181    }
182    shutdown_flag.store(true, std::sync::atomic::Ordering::Release);
183}
184
185pub(crate) struct LzmaCoder {
186    pub(crate) pos_mask: u32,
187    pub(crate) reps: [i32; REPS],
188    pub(crate) state: State,
189    pub(crate) is_match: [[u16; POS_STATES_MAX]; STATES],
190    pub(crate) is_rep: [u16; STATES],
191    pub(crate) is_rep0: [u16; STATES],
192    pub(crate) is_rep1: [u16; STATES],
193    pub(crate) is_rep2: [u16; STATES],
194    pub(crate) is_rep0_long: [[u16; POS_STATES_MAX]; STATES],
195    pub(crate) dist_slots: [[u16; DIST_SLOTS]; DIST_STATES],
196    dist_special: [u16; 124],
197    dist_align: [u16; ALIGN_SIZE],
198}
199
200pub(crate) fn coder_get_dict_size(len: usize) -> usize {
201    if len < DIST_STATES + MATCH_LEN_MIN {
202        len - MATCH_LEN_MIN
203    } else {
204        DIST_STATES - 1
205    }
206}
207
208pub(crate) fn get_dist_state(len: u32) -> u32 {
209    (if (len as usize) < DIST_STATES + MATCH_LEN_MIN {
210        len as usize - MATCH_LEN_MIN
211    } else {
212        DIST_STATES - 1
213    }) as u32
214}
215
216impl LzmaCoder {
217    pub(crate) fn new(pb: usize) -> Self {
218        let mut c = Self {
219            pos_mask: (1 << pb) - 1,
220            reps: Default::default(),
221            state: Default::default(),
222            is_match: Default::default(),
223            is_rep: Default::default(),
224            is_rep0: Default::default(),
225            is_rep1: Default::default(),
226            is_rep2: Default::default(),
227            is_rep0_long: Default::default(),
228            dist_slots: [[Default::default(); DIST_SLOTS]; DIST_STATES],
229            dist_special: [Default::default(); 124],
230            dist_align: Default::default(),
231        };
232        c.reset();
233        c
234    }
235
236    pub(crate) fn reset(&mut self) {
237        self.reps = [0; REPS];
238        self.state.reset();
239        for ele in self.is_match.iter_mut() {
240            init_probs(ele);
241        }
242        init_probs(&mut self.is_rep);
243        init_probs(&mut self.is_rep0);
244        init_probs(&mut self.is_rep1);
245        init_probs(&mut self.is_rep2);
246
247        for ele in self.is_rep0_long.iter_mut() {
248            init_probs(ele);
249        }
250        for ele in self.dist_slots.iter_mut() {
251            init_probs(ele);
252        }
253        init_probs(&mut self.dist_special);
254        init_probs(&mut self.dist_align);
255    }
256
257    #[inline(always)]
258    pub(crate) fn get_dist_special(&mut self, i: usize) -> &mut [u16] {
259        &mut self.dist_special[DIST_SPECIAL_INDEX[i]..DIST_SPECIAL_END[i]]
260    }
261}
262
263#[inline(always)]
264pub(crate) fn init_probs(probs: &mut [u16]) {
265    probs.fill(PROB_INIT);
266}
267
268pub(crate) struct LiteralCoder {
269    lc: u32,
270    literal_pos_mask: u32,
271}
272
273#[derive(Debug, Clone, Copy)]
274pub(crate) struct LiteralSubCoder {
275    probs: [u16; 0x300],
276}
277
278impl LiteralSubCoder {
279    pub fn new() -> Self {
280        let probs = [PROB_INIT; 0x300];
281        Self { probs }
282    }
283
284    pub fn reset(&mut self) {
285        self.probs = [PROB_INIT; 0x300];
286    }
287}
288
289impl LiteralCoder {
290    pub fn new(lc: u32, lp: u32) -> Self {
291        Self {
292            lc,
293            literal_pos_mask: (1 << lp) - 1,
294        }
295    }
296
297    pub(crate) fn get_sub_coder_index(&self, prev_byte: u32, pos: u32) -> u32 {
298        let low = prev_byte >> (8 - self.lc);
299        let high = (pos & self.literal_pos_mask) << self.lc;
300        low + high
301    }
302}
303
304pub(crate) struct LengthCoder {
305    choice: [u16; 2],
306    low: [[u16; LOW_SYMBOLS]; POS_STATES_MAX],
307    mid: [[u16; MID_SYMBOLS]; POS_STATES_MAX],
308    high: [u16; HIGH_SYMBOLS],
309}
310
311impl LengthCoder {
312    pub fn new() -> Self {
313        Self {
314            choice: Default::default(),
315            low: Default::default(),
316            mid: Default::default(),
317            high: [0; HIGH_SYMBOLS],
318        }
319    }
320
321    pub fn reset(&mut self) {
322        init_probs(&mut self.choice);
323        for ele in self.low.iter_mut() {
324            init_probs(ele);
325        }
326        for ele in self.mid.iter_mut() {
327            init_probs(ele);
328        }
329        init_probs(&mut self.high);
330    }
331}
332
333trait ByteReader {
334    fn read_u8(&mut self) -> Result<u8>;
335
336    fn read_u16(&mut self) -> Result<u16>;
337
338    fn read_u16_be(&mut self) -> Result<u16>;
339
340    fn read_u32(&mut self) -> Result<u32>;
341
342    fn read_u32_be(&mut self) -> Result<u32>;
343
344    fn read_u64(&mut self) -> Result<u64>;
345}
346
347trait ByteWriter {
348    fn write_u8(&mut self, value: u8) -> Result<()>;
349
350    fn write_u16(&mut self, value: u16) -> Result<()>;
351
352    fn write_u32(&mut self, value: u32) -> Result<()>;
353
354    fn write_u64(&mut self, value: u64) -> Result<()>;
355}
356
357impl<T: Read> ByteReader for T {
358    #[inline(always)]
359    fn read_u8(&mut self) -> Result<u8> {
360        let mut buf = [0; 1];
361        self.read_exact(&mut buf)?;
362        Ok(buf[0])
363    }
364
365    #[inline(always)]
366    fn read_u16(&mut self) -> Result<u16> {
367        let mut buf = [0; 2];
368        self.read_exact(buf.as_mut())?;
369        Ok(u16::from_le_bytes(buf))
370    }
371
372    #[inline(always)]
373    fn read_u16_be(&mut self) -> Result<u16> {
374        let mut buf = [0; 2];
375        self.read_exact(buf.as_mut())?;
376        Ok(u16::from_be_bytes(buf))
377    }
378
379    #[inline(always)]
380    fn read_u32(&mut self) -> Result<u32> {
381        let mut buf = [0; 4];
382        self.read_exact(buf.as_mut())?;
383        Ok(u32::from_le_bytes(buf))
384    }
385
386    #[inline(always)]
387    fn read_u32_be(&mut self) -> Result<u32> {
388        let mut buf = [0; 4];
389        self.read_exact(buf.as_mut())?;
390        Ok(u32::from_be_bytes(buf))
391    }
392
393    #[inline(always)]
394    fn read_u64(&mut self) -> Result<u64> {
395        let mut buf = [0; 8];
396        self.read_exact(buf.as_mut())?;
397        Ok(u64::from_le_bytes(buf))
398    }
399}
400
401impl<T: Write> ByteWriter for T {
402    #[inline(always)]
403    fn write_u8(&mut self, value: u8) -> Result<()> {
404        self.write_all(&[value])
405    }
406
407    #[inline(always)]
408    fn write_u16(&mut self, value: u16) -> Result<()> {
409        self.write_all(&value.to_le_bytes())
410    }
411
412    #[inline(always)]
413    fn write_u32(&mut self, value: u32) -> Result<()> {
414        self.write_all(&value.to_le_bytes())
415    }
416
417    #[inline(always)]
418    fn write_u64(&mut self, value: u64) -> Result<()> {
419        self.write_all(&value.to_le_bytes())
420    }
421}
422
423#[cfg(feature = "std")]
424#[inline(always)]
425fn error_eof() -> Error {
426    Error::new(std::io::ErrorKind::UnexpectedEof, "unexpected EOF")
427}
428
429#[cfg(feature = "std")]
430#[inline(always)]
431fn error_other(msg: &'static str) -> Error {
432    Error::other(msg)
433}
434
435#[cfg(feature = "std")]
436#[inline(always)]
437fn error_invalid_input(msg: &'static str) -> Error {
438    Error::new(std::io::ErrorKind::InvalidInput, msg)
439}
440
441#[cfg(feature = "std")]
442#[inline(always)]
443fn error_invalid_data(msg: &'static str) -> Error {
444    Error::new(std::io::ErrorKind::InvalidData, msg)
445}
446
447#[cfg(feature = "std")]
448#[inline(always)]
449fn error_out_of_memory(msg: &'static str) -> Error {
450    Error::new(std::io::ErrorKind::OutOfMemory, msg)
451}
452
453#[cfg(feature = "std")]
454#[inline(always)]
455fn error_unsupported(msg: &'static str) -> Error {
456    Error::new(std::io::ErrorKind::Unsupported, msg)
457}
458
459#[cfg(feature = "std")]
460#[inline(always)]
461fn copy_error(error: &Error) -> Error {
462    Error::new(error.kind(), error.to_string())
463}
464
465#[cfg(not(feature = "std"))]
466#[inline(always)]
467fn error_eof() -> Error {
468    Error::Eof
469}
470
471#[cfg(not(feature = "std"))]
472#[inline(always)]
473fn error_other(msg: &'static str) -> Error {
474    Error::Other(msg)
475}
476
477#[cfg(not(feature = "std"))]
478#[inline(always)]
479fn error_invalid_input(msg: &'static str) -> Error {
480    Error::InvalidInput(msg)
481}
482
483#[cfg(not(feature = "std"))]
484#[inline(always)]
485fn error_invalid_data(msg: &'static str) -> Error {
486    Error::InvalidData(msg)
487}
488
489#[cfg(not(feature = "std"))]
490#[inline(always)]
491fn error_out_of_memory(msg: &'static str) -> Error {
492    Error::OutOfMemory(msg)
493}
494
495#[cfg(not(feature = "std"))]
496#[inline(always)]
497fn error_unsupported(msg: &'static str) -> Error {
498    Error::Unsupported(msg)
499}
500
501#[cfg(not(feature = "std"))]
502#[inline(always)]
503fn copy_error(error: &Error) -> Error {
504    *error
505}
506
507struct CountingReader<R> {
508    inner: R,
509    bytes_read: u64,
510}
511
512impl<R> CountingReader<R> {
513    fn new(inner: R) -> Self {
514        Self {
515            inner,
516            bytes_read: 0,
517        }
518    }
519
520    fn with_count(inner: R, bytes_read: u64) -> Self {
521        Self { inner, bytes_read }
522    }
523
524    fn bytes_read(&self) -> u64 {
525        self.bytes_read
526    }
527
528    fn into_inner(self) -> R {
529        self.inner
530    }
531
532    fn inner(&self) -> &R {
533        &self.inner
534    }
535
536    fn inner_mut(&mut self) -> &mut R {
537        &mut self.inner
538    }
539}
540
541impl<R: Read> Read for CountingReader<R> {
542    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
543        let read_size = self.inner.read(buf)?;
544        self.bytes_read += read_size as u64;
545        Ok(read_size)
546    }
547}
548
549#[cfg(feature = "encoder")]
550struct CountingWriter<W> {
551    inner: W,
552    bytes_written: u64,
553}
554
555#[cfg(feature = "encoder")]
556impl<W> CountingWriter<W> {
557    fn new(inner: W) -> Self {
558        Self {
559            inner,
560            bytes_written: 0,
561        }
562    }
563
564    fn bytes_written(&self) -> u64 {
565        self.bytes_written
566    }
567
568    fn into_inner(self) -> W {
569        self.inner
570    }
571
572    fn inner(&self) -> &W {
573        &self.inner
574    }
575
576    fn inner_mut(&mut self) -> &mut W {
577        &mut self.inner
578    }
579}
580
581#[cfg(feature = "encoder")]
582impl<W: Write> Write for CountingWriter<W> {
583    fn write(&mut self, buf: &[u8]) -> Result<usize> {
584        let bytes_written = self.inner.write(buf)?;
585        self.bytes_written += bytes_written as u64;
586        Ok(bytes_written)
587    }
588
589    fn flush(&mut self) -> Result<()> {
590        self.inner.flush()
591    }
592}
593
594/// A trait for writers that finishes the stream on drop.
595trait AutoFinish {
596    /// Finish writing the stream without error handling.
597    fn finish_ignore_error(self);
598}
599
600/// A wrapper around a writer that finishes the stream on drop.
601#[allow(private_bounds)]
602pub struct AutoFinisher<T: AutoFinish>(Option<T>);
603
604impl<T: AutoFinish> Drop for AutoFinisher<T> {
605    fn drop(&mut self) {
606        if let Some(writer) = self.0.take() {
607            writer.finish_ignore_error();
608        }
609    }
610}
611
612impl<T: AutoFinish> core::ops::Deref for AutoFinisher<T> {
613    type Target = T;
614
615    fn deref(&self) -> &Self::Target {
616        self.0.as_ref().unwrap()
617    }
618}
619
620impl<T: AutoFinish> core::ops::DerefMut for AutoFinisher<T> {
621    fn deref_mut(&mut self) -> &mut Self::Target {
622        self.0.as_mut().unwrap()
623    }
624}
625
626impl<T: AutoFinish + Write> Write for AutoFinisher<T> {
627    fn write(&mut self, buf: &[u8]) -> Result<usize> {
628        use core::ops::DerefMut;
629
630        self.deref_mut().write(buf)
631    }
632
633    fn flush(&mut self) -> Result<()> {
634        use core::ops::DerefMut;
635
636        self.deref_mut().flush()
637    }
638}