Skip to main content

ai_weezl/
lib.rs

1//! # LZW decoder and encoder
2//!
3//! This crates provides an `Encoder` and a `Decoder` in their respective modules. The code words
4//! are written from and to bit byte slices (or streams) where it is possible to write either the
5//! most or least significant bits first. The maximum possible code size is 12 bits, the smallest
6//! available code size is 2 bits.
7//!
8//! ## Example
9//!
10//! These two code blocks show the compression and corresponding decompression. Note that you must
11//! use the same arguments to `Encoder` and `Decoder`, otherwise the decoding might fail or produce
12//! bad results.
13//!
14#![doc = "```"]
15//! use ai_weezl::{BitOrder, encode::Encoder};
16//!
17//! let data = b"Hello, world";
18//! let compressed = Encoder::new(BitOrder::Msb, 9)
19//!     .encode(data)
20//!     .unwrap();
21//! ```
22//!
23#![doc = "```"]
24//! use ai_weezl::{BitOrder, decode::Decoder};
25//! # let compressed = b"\x80\x04\x81\x94l\x1b\x06\xf0\xb0 \x1d\xc6\xf1\xc8l\x19 \x10".to_vec();
26//! # let data = b"Hello, world";
27//!
28//! let decompressed = Decoder::new(BitOrder::Msb, 9)
29//!     .decode(&compressed)
30//!     .unwrap();
31//! assert_eq!(decompressed, data);
32//! ```
33//!
34//! ## LZW Details
35//!
36//! The de- and encoder expect the LZW stream to start with a clear code and end with an
37//! end code which are defined as follows:
38//!
39//!  * `CLEAR_CODE == 1 << min_code_size`
40//!  * `END_CODE   == CLEAR_CODE + 1`
41//!
42//! For optimal performance, all buffers and input and output slices should be as large as possible
43//! and at least 2048 bytes long. This extends to input streams which should have similarly sized
44//! buffers. This library uses Rust's standard allocation interfaces (`Box` and `Vec` to be
45//! precise). Since there are no ways to handle allocation errors it is not recommended to operate
46//! it on 16-bit targets.
47//!
48//! ## Allocations and standard library
49//!
50//! The main algorithm can be used in `no_std` as well, although it requires an allocator. This
51//! restriction might be lifted at a later stage. For this you should deactivate the `std` feature.
52//! The main interfaces stay intact but the `into_stream` combinator is no available.
53#![cfg_attr(all(not(test), not(feature = "std")), no_std)]
54#![forbid(unsafe_code)]
55#![forbid(missing_docs)]
56
57#[cfg(feature = "std")]
58extern crate std;
59
60extern crate alloc;
61
62pub(crate) const MAX_CODESIZE: u8 = 12;
63pub(crate) const MAX_ENTRIES: usize = 1 << MAX_CODESIZE as usize;
64
65/// Alias for a LZW code point
66pub(crate) type Code = u16;
67
68/// A default buffer size for encoding/decoding buffer.
69///
70/// Note that this is larger than the default size for buffers (usually 4K) since each code word
71/// can expand to multiple bytes. Expanding one buffer would yield multiple and require a costly
72/// break in the decoding loop. Note that the decoded size can be up to quadratic in code block.
73pub(crate) const STREAM_BUF_SIZE: usize = 1 << 24;
74
75/// The order of bits in bytes.
76#[derive(Clone, Copy, Debug)]
77pub enum BitOrder {
78    /// The most significant bit is processed first.
79    Msb,
80    /// The least significant bit is processed first.
81    Lsb,
82}
83
84/// An owned or borrowed buffer for stream operations.
85pub(crate) enum StreamBuf<'d> {
86    Borrowed(&'d mut [u8]),
87    Owned(alloc::vec::Vec<u8>),
88}
89
90#[cold]
91fn assert_decode_size(size: u8) {
92    assert!(
93        size <= MAX_CODESIZE,
94        "Maximum code size 12 required, got {}",
95        size
96    );
97}
98
99#[cold]
100fn assert_encode_size(size: u8) {
101    assert!(size >= 2, "Minimum code size 2 required, got {}", size);
102    assert!(
103        size <= MAX_CODESIZE,
104        "Maximum code size 12 required, got {}",
105        size
106    );
107}
108
109pub mod decode;
110pub mod encode;
111mod error;
112
113pub use self::error::{BufferResult, LzwError, LzwStatus, StreamResult};
114
115#[cfg(test)]
116mod tests {
117    use crate::decode::Decoder;
118    use crate::encode::Encoder;
119
120    use crate::{decode, encode};
121
122    #[test]
123    fn stable_send() {
124        fn must_be_send<T: Send + 'static>() {}
125        must_be_send::<Decoder>();
126        must_be_send::<Encoder>();
127
128        fn _send_and_lt<'lt, T: Send + 'lt>() {}
129
130        // Check that the inference `W: Send + 'd` => `IntoStream: Send + 'd` works.
131        fn _all_send_writer<'d, W: no_std_io::io::Write + Send + 'd>() {
132            _send_and_lt::<'d, decode::IntoStream<'d, W>>();
133            _send_and_lt::<'d, encode::IntoStream<'d, W>>();
134        }
135    }
136}