1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
//! # LZW decoder and encoder
//!
//! This crates provides an `Encoder` and a `Decoder` in their respective modules. The code words
//! are written from and to bit byte slices (or streams) where it is possible to write either the
//! most or least significant bits first. The maximum possible code size is 12 bits, the smallest
//! available code size is 2 bits.
//!
//! The de- and encoder expect the LZW stream to start with a clear code and end with an
//! end code which are defined as follows:
//!
//!  * `CLEAR_CODE == 1 << min_code_size`
//!  * `END_CODE   == CLEAR_CODE + 1`
//!
//! For optimal performance, all buffers and input and output slices should be as large as possible
//! and at least 2048 bytes long. This extends to input streams which should have similarly sized
//! buffers. This library uses Rust's standard allocation interfaces (`Box` and `Vec` to be
//! precise). Since there are no ways to handle allocation errors it is not recommended to operate
//! it on 16-bit targets.
//!
//! Exemplary use of the encoder:
//!
#![cfg_attr(feature = "std", doc = "```")]
#![cfg_attr(not(feature = "std"), doc = "```ignore")]
//! use weezl::{BitOrder, encode::Encoder};
//! let size = 8;
//! let data = b"TOBEORNOTTOBEORTOBEORNOT";
//! let mut compressed = vec![];
//!
//! let mut enc = Encoder::new(BitOrder::Msb, size);
//! let result = enc.into_stream(&mut compressed).encode(&data[..]);
//! result.status.unwrap();
//! ```
//!
//! The main algorithm can be used in `no_std` as well, although it requires an allocator. This
//! restriction might be lifted at a later stage. For this you should deactivate the `std` feature.
//! The main interfaces stay intact but the `into_stream` combinator is no available.
#![cfg_attr(not(feature = "std"), no_std)]
#![forbid(unsafe_code)]
#![forbid(missing_docs)]

#[cfg(all(feature = "alloc", not(feature = "std")))]
extern crate alloc;
#[cfg(all(feature = "alloc", feature = "std"))]
use std as alloc;

pub(crate) const MAX_CODESIZE: u8 = 12;
pub(crate) const MAX_ENTRIES: usize = 1 << MAX_CODESIZE as usize;

/// Alias for a LZW code point
pub(crate) type Code = u16;

/// A default buffer size for encoding/decoding buffer.
///
/// Note that this is larger than the default size for buffers (usually 4K) since each code word
/// can expand to multiple bytes. Expanding one buffer would yield multiple and require a costly
/// break in the decoding loop. Note that the decoded size can be up to quadratic in code block.
pub(crate) const STREAM_BUF_SIZE: usize = 1 << 24;

/// The order of bits in bytes.
#[derive(Clone, Copy, Debug)]
pub enum BitOrder {
    /// The most significant bit is processed first.
    Msb,
    /// The least significant bit is processed first.
    Lsb,
}

/// An owned or borrowed buffer for stream operations.
#[cfg(feature = "alloc")]
pub(crate) enum StreamBuf<'d> {
    Borrowed(&'d mut [u8]),
    Owned(crate::alloc::vec::Vec<u8>),
}

#[cold]
fn assert_code_size(size: u8) {
    assert!(size >= 2, "Minimum code size 2 required, got {}", size);
    assert!(
        size <= MAX_CODESIZE,
        "Maximum code size 12 required, got {}",
        size
    );
}

#[cfg(feature = "alloc")]
pub mod decode;
#[cfg(feature = "alloc")]
pub mod encode;
mod error;

#[cfg(feature = "std")]
pub use self::error::StreamResult;
pub use self::error::{BufferResult, LzwError, LzwStatus};

#[cfg(all(test, feature = "alloc"))]
mod tests {
    use crate::decode::Decoder;
    use crate::encode::Encoder;

    #[cfg(feature = "std")]
    use crate::{decode, encode};

    #[test]
    fn stable_send() {
        fn must_be_send<T: Send + 'static>() {}
        must_be_send::<Decoder>();
        must_be_send::<Encoder>();

        #[cfg(feature = "std")]
        fn _send_and_lt<'lt, T: Send + 'lt>() {}

        // Check that the inference `W: Send + 'd` => `IntoStream: Send + 'd` works.
        #[cfg(feature = "std")]
        fn _all_send_writer<'d, W: std::io::Write + Send + 'd>() {
            _send_and_lt::<'d, decode::IntoStream<'d, W>>();
            _send_and_lt::<'d, encode::IntoStream<'d, W>>();
        }
    }
}