libflate_lz77/
lib.rs

1//! The interface and implementations of LZ77 compression algorithm.
2//!
3//! LZ77 is a compression algorithm used in [DEFLATE](https://tools.ietf.org/html/rfc1951).
4#![warn(missing_docs)]
5#![cfg_attr(not(feature = "std"), no_std)]
6
7extern crate alloc;
8
9pub use self::default::{DefaultLz77Encoder, DefaultLz77EncoderBuilder};
10use alloc::vec::Vec;
11use core::cmp;
12use core2::io;
13use rle_decode_fast::rle_decode;
14
15mod default;
16
17/// Maximum length of sharable bytes in a pointer.
18pub const MAX_LENGTH: u16 = 258;
19
20/// Maximum backward distance of a pointer.
21pub const MAX_DISTANCE: u16 = 32_768;
22
23/// Maximum size of a sliding window.
24pub const MAX_WINDOW_SIZE: u16 = MAX_DISTANCE;
25
26/// A LZ77 encoded data.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
28pub enum Code {
29    /// Literal byte.
30    Literal(u8),
31
32    /// Backward pointer to shared data.
33    Pointer {
34        /// Length of the shared data.
35        /// The values must be limited to [`MAX_LENGTH`].
36        length: u16,
37
38        /// Distance between current position and start position of the shared data.
39        /// The values must be limited to [`MAX_DISTANCE`].
40        backward_distance: u16,
41    },
42}
43
44/// Compression level.
45#[derive(Debug, Clone, PartialEq, Eq, Hash)]
46pub enum CompressionLevel {
47    /// No compression.
48    None,
49
50    /// Best speed.
51    Fast,
52
53    /// Balanced between speed and size.
54    Balance,
55
56    /// Best compression.
57    Best,
58}
59
60/// The [`Sink`] trait represents a consumer of LZ77 encoded data.
61pub trait Sink {
62    /// Consumes a LZ77 encoded `Code`.
63    fn consume(&mut self, code: Code);
64}
65impl<T> Sink for &mut T
66where
67    T: Sink,
68{
69    fn consume(&mut self, code: Code) {
70        (*self).consume(code);
71    }
72}
73impl<T> Sink for Vec<T>
74where
75    T: From<Code>,
76{
77    fn consume(&mut self, code: Code) {
78        self.push(T::from(code));
79    }
80}
81
82/// The [`Lz77Encode`] trait defines the interface of LZ77 encoding algorithm.
83pub trait Lz77Encode {
84    /// Encodes a buffer and writes result LZ77 codes to `sink`.
85    fn encode<S>(&mut self, buf: &[u8], sink: S)
86    where
87        S: Sink;
88
89    /// Flushes the encoder, ensuring that all intermediately buffered codes are consumed by `sink`.
90    fn flush<S>(&mut self, sink: S)
91    where
92        S: Sink;
93
94    /// Returns the compression level of the encoder.
95    ///
96    /// If the implementation is omitted, [`CompressionLevel::Balance`] will be returned.
97    fn compression_level(&self) -> CompressionLevel {
98        CompressionLevel::Balance
99    }
100
101    /// Returns the window size of the encoder.
102    ///
103    /// If the implementation is omitted, [`MAX_WINDOW_SIZE`] will be returned.
104    fn window_size(&self) -> u16 {
105        MAX_WINDOW_SIZE
106    }
107}
108
109/// A no compression implementation of [`Lz77Encode`] trait.
110#[derive(Debug, Default)]
111pub struct NoCompressionLz77Encoder;
112impl NoCompressionLz77Encoder {
113    /// Makes a new encoder instance.
114    ///
115    /// # Examples
116    /// ```
117    /// use libflate::deflate;
118    /// use libflate::lz77::{Lz77Encode, NoCompressionLz77Encoder, CompressionLevel};
119    ///
120    /// let lz77 = NoCompressionLz77Encoder::new();
121    /// assert_eq!(lz77.compression_level(), CompressionLevel::None);
122    ///
123    /// let options = deflate::EncodeOptions::with_lz77(lz77);
124    /// let _deflate = deflate::Encoder::with_options(Vec::new(), options);
125    /// ```
126    pub fn new() -> Self {
127        NoCompressionLz77Encoder
128    }
129}
130impl Lz77Encode for NoCompressionLz77Encoder {
131    fn encode<S>(&mut self, buf: &[u8], mut sink: S)
132    where
133        S: Sink,
134    {
135        for c in buf.iter().cloned().map(Code::Literal) {
136            sink.consume(c);
137        }
138    }
139    #[allow(unused_variables)]
140    fn flush<S>(&mut self, sink: S)
141    where
142        S: Sink,
143    {
144    }
145    fn compression_level(&self) -> CompressionLevel {
146        CompressionLevel::None
147    }
148}
149
150/// LZ77 decoder.
151#[derive(Debug, Default)]
152pub struct Lz77Decoder {
153    buffer: Vec<u8>,
154    offset: usize,
155}
156
157impl Lz77Decoder {
158    /// Makes a new [`Lz77Decoder`] instance.
159    pub fn new() -> Self {
160        Self::default()
161    }
162
163    /// Decodes a [`Code`].
164    ///
165    /// The decoded bytes are appended to the buffer of [`Lz77Decoder`].
166    #[inline]
167    pub fn decode(&mut self, code: Code) -> io::Result<()> {
168        match code {
169            Code::Literal(b) => {
170                self.buffer.push(b);
171            }
172            Code::Pointer {
173                length,
174                backward_distance,
175            } => {
176                if self.buffer.len() < backward_distance as usize {
177                    return Err(io::Error::new(
178                        io::ErrorKind::InvalidData,
179                        #[cfg(feature = "std")]
180                        format!(
181                            "Too long backword reference: buffer.len={}, distance={}",
182                            self.buffer.len(),
183                            backward_distance
184                        ),
185                        #[cfg(not(feature = "std"))]
186                        "Too long backword reference",
187                    ));
188                }
189                rle_decode(
190                    &mut self.buffer,
191                    usize::from(backward_distance),
192                    usize::from(length),
193                );
194            }
195        }
196        Ok(())
197    }
198
199    /// Appends the bytes read from `reader` to the buffer of [`Lz77Decoder`].
200    pub fn extend_from_reader<R: io::Read>(&mut self, mut reader: R) -> io::Result<usize> {
201        reader.read_to_end(&mut self.buffer)
202    }
203
204    /// Appends the given bytes to the buffer of [`Lz77Decoder`].
205    pub fn extend_from_slice(&mut self, buf: &[u8]) {
206        self.buffer.extend_from_slice(buf);
207        self.offset += buf.len();
208    }
209
210    /// Clears the buffer of [`Lz77Decoder`].
211    pub fn clear(&mut self) {
212        self.buffer.clear();
213        self.offset = 0;
214    }
215
216    /// Returns the buffer of [`Lz77Decoder`].
217    #[inline]
218    pub fn buffer(&self) -> &[u8] {
219        &self.buffer[self.offset..]
220    }
221
222    fn truncate_old_buffer(&mut self) {
223        if self.buffer().is_empty() && self.buffer.len() > MAX_DISTANCE as usize * 4 {
224            let old_len = self.buffer.len();
225            let new_len = MAX_DISTANCE as usize;
226            {
227                // isolation to please borrow checker
228                let (dst, src) = self.buffer.split_at_mut(old_len - new_len);
229                dst[..new_len].copy_from_slice(src);
230            }
231            self.buffer.truncate(new_len);
232            self.offset = new_len;
233        }
234    }
235}
236
237impl io::Read for Lz77Decoder {
238    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
239        let copy_size = cmp::min(buf.len(), self.buffer.len() - self.offset);
240        buf[..copy_size].copy_from_slice(&self.buffer[self.offset..][..copy_size]);
241        self.offset += copy_size;
242        self.truncate_old_buffer();
243        Ok(copy_size)
244    }
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250    use alloc::vec::Vec;
251    use core2::io::Read as _;
252
253    #[test]
254    fn encoder_and_decoder_works() {
255        let mut codes = Vec::new();
256        let mut encoder = DefaultLz77Encoder::new();
257        encoder.encode(b"hello world!", &mut codes);
258        encoder.flush(&mut codes);
259        assert!(!codes.is_empty());
260
261        let mut decoder = Lz77Decoder::new();
262        for code in codes {
263            decoder.decode(code).unwrap();
264        }
265        assert_eq!(decoder.buffer(), b"hello world!");
266
267        let mut decoded = Vec::new();
268        decoder.read_to_end(&mut decoded).unwrap();
269        assert_eq!(decoded, b"hello world!");
270        assert!(decoder.buffer().is_empty());
271    }
272}