lamezip77/
lz77.rs

1use crate::hashtables::HashBits;
2use crate::sliding_window::SlidingWindowBuf;
3
4#[cfg(feature = "alloc")]
5extern crate alloc as alloc_crate;
6#[cfg(feature = "alloc")]
7use alloc_crate::{alloc, boxed::Box};
8
9/// Settings for tuning compression
10#[derive(Clone, Copy, PartialEq, Eq, Debug)]
11pub struct LZSettings {
12    /// If a match has a length >= this value, use it immediately and stop searching.
13    ///
14    /// `nice_length` in the gzip implementation
15    pub good_enough_search_len: u64,
16    /// If a match has a length > this value, don't bother inserting all sliding substrings
17    /// into hash table (only insert the head, and then skip everything until the next input after the match)
18    ///
19    /// `max_lazy` in the gzip implementation, only for compression levels <= 3
20    pub max_len_to_insert_all_substr: u64,
21    /// Only follow the hash table at most this many times.
22    ///
23    /// `max_chain` in the gzip implementation
24    pub max_prev_chain_follows: u64,
25    /// Don't output matches immediately. Instead, try one position forward to see if it produces a longer match
26    ///
27    /// In the gzip implementation, done for compression levels >= 4
28    pub defer_output_match: bool,
29    /// Don't look for a potentially-longer match if there is already one this good.
30    /// Only used when [defer_output_match](Self::defer_output_match) is set.
31    ///
32    /// `max_lazy` in the gzip implementation, only for compression levels >= 4
33    pub good_enough_defer_len: u64,
34    /// If there is already a deferred match this long, follow hash table chain fewer times.
35    /// Only used when [defer_output_match](Self::defer_output_match) is set.
36    ///
37    /// `good_length` in the gzip implementation.
38    pub search_faster_defer_len: u64,
39    /// Minimum distance needed for a match (i.e. larger than 1)
40    ///
41    /// This is used for Nintendo LZ77 in VRAM mode.
42    pub min_disp: u64,
43    /// Hold this number of literals at the end of stream, and output it as a literal run.
44    ///
45    /// This is used to meet LZ4's end-of-stream conditions in a simple way.
46    pub eos_holdout_bytes: u64,
47}
48
49impl Default for LZSettings {
50    fn default() -> Self {
51        Self {
52            good_enough_search_len: u64::MAX,
53            max_len_to_insert_all_substr: u64::MAX,
54            max_prev_chain_follows: u64::MAX,
55            defer_output_match: false,
56            good_enough_defer_len: u64::MAX,
57            search_faster_defer_len: u64::MAX,
58            min_disp: 1,
59            eos_holdout_bytes: 0,
60        }
61    }
62}
63
64/// Something that the LZ77 engine can output.
65/// Either a literal or a backreference.
66#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
67pub enum LZOutput {
68    /// Literal byte
69    Lit(u8),
70    /// Backreference. `disp` is at least 1, where 1 is the last byte output.
71    Ref { disp: u64, len: u64 },
72}
73
74impl Default for LZOutput {
75    fn default() -> Self {
76        Self::Lit(0)
77    }
78}
79
80#[derive(Clone, Copy, PartialEq, Eq, Debug)]
81struct DeferredMatch {
82    first_byte: u8,
83    best_match_pos: u64,
84    best_match_len: u64,
85}
86
87/// Parameterized, streaming LZ77 compression engine
88///
89/// * `LOOKBACK_SZ` specifies the window size
90/// * `LOOKAHEAD_SZ` specifies the number of bytes that will be buffered before attempting to compress any output.
91/// * `TOT_BUF_SZ` must be the sum of `LOOKBACK_SZ` and `LOOKAHEAD_SZ` and is a const generics workaround
92/// * `MIN_MATCH` specifies the minimum length of matches
93/// * `MAX_MATCH` specifies the maximum length of matches. It may be usize::MAX to make this unlimited.
94///    This may exceed `LOOKAHEAD_SZ`, but longer matches are only possible if a large chunk of data is passed to the
95///    compression function in a single call.
96/// * `HASH_BITS` specifies the number of bits to use in hashes.
97/// * `HASH_SZ` must be `1 << HASH_BITS` and is a const generics workaround.
98/// * `DICT_BITS` specifies the number of bits to use in the hash chain table.
99/// * `DICT_SZ` must be `1 << DICT_BITS` and is a const generics workaround.
100pub struct LZEngine<
101    const LOOKBACK_SZ: usize,
102    const LOOKAHEAD_SZ: usize,
103    const TOT_BUF_SZ: usize,
104    const MIN_MATCH: usize,
105    const MAX_MATCH: usize,
106    const HASH_BITS: usize,
107    const HASH_SZ: usize,
108    const DICT_BITS: usize,
109    const DICT_SZ: usize,
110> {
111    pub(crate) sbuf: SlidingWindowBuf<LOOKBACK_SZ, LOOKAHEAD_SZ, TOT_BUF_SZ>,
112    pub(crate) h: HashBits<MIN_MATCH, HASH_BITS, HASH_SZ, DICT_BITS, DICT_SZ>,
113    redo_hash_at_cursor: bool,
114    redo_hash_behind_cursor_num_missing: u8,
115    deferred_match: Option<DeferredMatch>,
116}
117
118impl<
119        const LOOKBACK_SZ: usize,
120        const LOOKAHEAD_SZ: usize,
121        const TOT_BUF_SZ: usize,
122        const MIN_MATCH: usize,
123        const MAX_MATCH: usize,
124        const HASH_BITS: usize,
125        const HASH_SZ: usize,
126        const DICT_BITS: usize,
127        const DICT_SZ: usize,
128    >
129    LZEngine<
130        LOOKBACK_SZ,
131        LOOKAHEAD_SZ,
132        TOT_BUF_SZ,
133        MIN_MATCH,
134        MAX_MATCH,
135        HASH_BITS,
136        HASH_SZ,
137        DICT_BITS,
138        DICT_SZ,
139    >
140{
141    /// Construct a new object
142    pub fn new() -> Self {
143        assert_eq!(HASH_SZ, 1 << HASH_BITS);
144        assert_eq!(DICT_SZ, 1 << DICT_BITS);
145        assert!(MIN_MATCH >= 1);
146        assert!(MIN_MATCH <= u8::MAX as usize);
147        // this condition is required so that we can actually calculate hash
148        assert!(LOOKAHEAD_SZ >= MIN_MATCH);
149        assert!(HASH_BITS <= 32);
150
151        Self {
152            sbuf: SlidingWindowBuf::new(),
153            h: HashBits::new(),
154            redo_hash_at_cursor: true,
155            redo_hash_behind_cursor_num_missing: 0,
156            deferred_match: None,
157        }
158    }
159    /// Construct a new object in place into a heap-allocated box
160    ///
161    /// This is needed whenever the optimizer refuses to construct the object as desired
162    /// without causing a stack overflow, as this object is large.
163    #[cfg(feature = "alloc")]
164    pub fn new_boxed() -> Box<Self> {
165        unsafe {
166            let layout = core::alloc::Layout::new::<Self>();
167            let p = alloc::alloc(layout) as *mut Self;
168            Self::initialize_at(p);
169            Box::from_raw(p)
170        }
171    }
172    /// Initialize this object in place at a given pointer
173    ///
174    /// This is needed whenever the optimizer refuses to construct the object as desired
175    /// without causing a stack overflow, as this object is large.
176    pub unsafe fn initialize_at(p: *mut Self) {
177        assert_eq!(HASH_SZ, 1 << HASH_BITS);
178        assert_eq!(DICT_SZ, 1 << DICT_BITS);
179        assert!(MIN_MATCH >= 1);
180        assert!(MIN_MATCH <= u8::MAX as usize);
181        // this condition is required so that we can actually calculate hash
182        assert!(LOOKAHEAD_SZ >= MIN_MATCH);
183        assert!(HASH_BITS <= 32);
184
185        SlidingWindowBuf::initialize_at(core::ptr::addr_of_mut!((*p).sbuf));
186        HashBits::initialize_at(core::ptr::addr_of_mut!((*p).h));
187        (*p).redo_hash_at_cursor = true;
188        (*p).redo_hash_behind_cursor_num_missing = 0;
189        (*p).deferred_match = None;
190    }
191
192    /// Compress some data
193    ///
194    /// * `settings` specifies compression settings. Settings can change between calls, although this is not well-tested.
195    /// * `inp` contains data to compress
196    /// * `end_of_stream` indicates whether there is no more input data, which causes final blocks to get flushed.
197    /// * `outp` is a callback sink for compressed data.
198    pub fn compress<O, E>(
199        &mut self,
200        settings: &LZSettings,
201        inp: &[u8],
202        end_of_stream: bool,
203        mut outp: O,
204    ) -> Result<(), E>
205    where
206        O: FnMut(LZOutput) -> Result<(), E>,
207    {
208        assert!(settings.min_disp >= 1);
209        assert!(settings.eos_holdout_bytes <= LOOKAHEAD_SZ as u64);
210
211        let mut win = self.sbuf.add_inp(inp);
212
213        if !settings.defer_output_match {
214            if let Some(deferred_match) = self.deferred_match {
215                // XXX this is not tested
216                outp(LZOutput::Lit(deferred_match.first_byte))?;
217                self.deferred_match = None;
218            }
219        }
220
221        let required_min_bytes_left = if end_of_stream {
222            settings.eos_holdout_bytes as usize
223        } else {
224            LOOKAHEAD_SZ
225        };
226
227        while win.tot_ahead_sz() > required_min_bytes_left {
228            if self.redo_hash_behind_cursor_num_missing > 0 {
229                // we know there is >= 1 byte always, and >= LOOKAHEAD_SZ + 1 (aka >= MIN_MATCH + 1) bytes if not EOS
230                // FIXME change EOS to flush??
231
232                let mut hash = self.h.hash_of_head;
233
234                for i in (0..self.redo_hash_behind_cursor_num_missing).rev() {
235                    // when we are in this situation, there is always one more htab update than hash update
236                    // so we start with a hash update
237                    hash = self
238                        .h
239                        .calc_new_hash(hash, win.peek_byte(MIN_MATCH - 1 - i as usize));
240
241                    if i != 0 {
242                        let _old_hpos = self.h.put_raw_into_htab(hash, win.cursor_pos() - i as u64);
243                    }
244                }
245
246                self.redo_hash_behind_cursor_num_missing = 0;
247                self.h.hash_of_head = hash;
248            }
249
250            if self.redo_hash_at_cursor {
251                // we need to prime the hash table by recomputing the hash for cursor
252                // either at the start or after a span
253
254                // we either have at least LOOKAHEAD_SZ + 1 bytes available
255                // (which is at least MIN_MATCH),
256                // *or* we don't but are at EOS already (very short input)
257                let mut hash = 0;
258                for i in 0..MIN_MATCH {
259                    hash = self.h.calc_new_hash(hash, win.peek_byte(i));
260                }
261                self.h.hash_of_head = hash;
262                self.redo_hash_at_cursor = false;
263            }
264
265            let b: u8 = win.peek_byte(0);
266
267            let mut old_hpos = self.h.put_head_into_htab(&win);
268
269            let mut best_match_endpeek = b;
270            let mut best_match_len = 1;
271            let mut best_match_pos = u64::MAX;
272
273            // this is what we're matching against
274            let max_match = if end_of_stream && settings.eos_holdout_bytes > 0 {
275                win.tot_ahead_sz() - settings.eos_holdout_bytes as usize
276            } else {
277                // xxx this overflow prevention is broken
278                MAX_MATCH.try_into().unwrap_or(usize::MAX - MIN_MATCH - 1)
279            };
280            // extra few bytes in the hopes that we don't have to
281            // do the redo_hash_behind_cursor_num_missing calculation
282            let cursor_spans = win.get_next_spans(win.cursor_pos(), max_match + MIN_MATCH + 1);
283
284            let skip_search = if let Some(deferred_match) = self.deferred_match {
285                // should always be true because of the XXX check at the beginning
286                debug_assert!(settings.defer_output_match);
287                deferred_match.best_match_len >= settings.good_enough_defer_len
288            } else {
289                false
290            };
291
292            let mut prev_follow_limit = if let Some(deferred_match) = self.deferred_match {
293                if deferred_match.best_match_len >= settings.search_faster_defer_len {
294                    settings.max_prev_chain_follows / 4
295                } else {
296                    settings.max_prev_chain_follows
297                }
298            } else {
299                settings.max_prev_chain_follows
300            };
301
302            if !skip_search {
303                // a match within range
304                // (we can terminate immediately because the prev chain only ever goes
305                // further and further backwards)
306                while prev_follow_limit > 0
307                    && old_hpos != u64::MAX
308                    && old_hpos + (LOOKBACK_SZ as u64) >= win.cursor_pos()
309                {
310                    prev_follow_limit -= 1;
311                    let eval_hpos = old_hpos;
312                    old_hpos = self.h.prev[(old_hpos & ((1 << DICT_BITS) - 1)) as usize];
313
314                    if !(eval_hpos + settings.min_disp <= win.cursor_pos()) {
315                        // too close
316                        continue;
317                    }
318
319                    let lookback_spans = win.get_next_spans(eval_hpos, max_match);
320
321                    // optimization because the match can't get better if it
322                    // doesn't match the best one we've got up to the best length we've got
323                    if lookback_spans[best_match_len - 1] != best_match_endpeek {
324                        continue;
325                    }
326
327                    let match_len = lookback_spans.compare(&cursor_spans);
328                    debug_assert!(match_len <= MAX_MATCH);
329                    if match_len >= MIN_MATCH {
330                        if match_len > best_match_len {
331                            best_match_len = match_len;
332                            best_match_pos = eval_hpos;
333                            best_match_endpeek = lookback_spans[match_len - 1];
334                            if match_len as u64 >= settings.good_enough_search_len {
335                                break;
336                            }
337                        }
338                    }
339                }
340            }
341
342            if settings.defer_output_match {
343                if let Some(deferred_match) = self.deferred_match {
344                    if deferred_match.best_match_len >= best_match_len as u64 {
345                        // if there is a deferred match, and it's better than this one,
346                        // then output the deferred one now
347
348                        outp(LZOutput::Ref {
349                            // cursor has been advanced one position
350                            disp: win.cursor_pos() - 1 - deferred_match.best_match_pos,
351                            len: deferred_match.best_match_len,
352                        })?;
353
354                        let match_len_minus_1 = (deferred_match.best_match_len - 1) as usize;
355                        if deferred_match.best_match_len <= settings.max_len_to_insert_all_substr {
356                            // because of the advancing of one position, the hash starting at cursor_pos() is already inserted
357                            // put_span_into_htab starts at +1, so this actually starts at cursor_pos()+1
358                            let deferred_span =
359                                win.get_next_spans(win.cursor_pos(), match_len_minus_1 + MIN_MATCH);
360                            debug_assert!(deferred_span.len() >= match_len_minus_1);
361                            self.h.put_span_into_htab(
362                                &deferred_span,
363                                win.cursor_pos(),
364                                match_len_minus_1,
365                            );
366                            let avail_extra_bytes = deferred_span.len() - match_len_minus_1;
367                            if avail_extra_bytes < MIN_MATCH {
368                                self.redo_hash_behind_cursor_num_missing =
369                                    (MIN_MATCH - avail_extra_bytes) as u8;
370                            }
371                        } else {
372                            self.redo_hash_at_cursor = true;
373                        }
374                        win.roll_window(match_len_minus_1);
375                        self.deferred_match = None;
376                    } else {
377                        // there is a deferred match, but it's worse than this one
378                        // output the first char, then save current as deferred
379
380                        outp(LZOutput::Lit(deferred_match.first_byte))?;
381                        self.deferred_match = Some(DeferredMatch {
382                            first_byte: b,
383                            best_match_pos,
384                            best_match_len: best_match_len as u64,
385                        });
386                        win.roll_window(1);
387                    }
388                } else {
389                    // no deferred match
390                    if best_match_len < MIN_MATCH {
391                        // no match here either
392                        outp(LZOutput::Lit(b))?;
393                    } else {
394                        // a match here, let's defer it
395
396                        self.deferred_match = Some(DeferredMatch {
397                            first_byte: b,
398                            best_match_pos,
399                            best_match_len: best_match_len as u64,
400                        });
401                    }
402                    // advance one position in any case
403                    win.roll_window(1);
404                }
405            } else {
406                if best_match_len < MIN_MATCH {
407                    // output a literal
408                    outp(LZOutput::Lit(b))?;
409                    win.roll_window(1);
410                } else {
411                    // output a match
412                    outp(LZOutput::Ref {
413                        disp: win.cursor_pos() - best_match_pos,
414                        len: best_match_len as u64,
415                    })?;
416                    if best_match_len as u64 <= settings.max_len_to_insert_all_substr {
417                        self.h
418                            .put_span_into_htab(&cursor_spans, win.cursor_pos(), best_match_len);
419                        let avail_extra_bytes = cursor_spans.len() - best_match_len;
420                        if avail_extra_bytes < MIN_MATCH {
421                            self.redo_hash_behind_cursor_num_missing =
422                                (MIN_MATCH - avail_extra_bytes) as u8;
423                        }
424                    } else {
425                        self.redo_hash_at_cursor = true;
426                    }
427                    win.roll_window(best_match_len);
428                }
429            }
430        }
431
432        if win.tot_ahead_sz() > 0 {
433            debug_assert!(win.tot_ahead_sz() <= LOOKAHEAD_SZ);
434            // ensure everything left is pulled into the buffer before we let go of inp
435            win.roll_window(0);
436        }
437
438        if end_of_stream {
439            if let Some(deferred_match) = self.deferred_match {
440                // output last deferred match
441                // ignore window, EOS
442
443                outp(LZOutput::Ref {
444                    // cursor has been advanced one position
445                    disp: win.cursor_pos() - 1 - deferred_match.best_match_pos,
446                    len: deferred_match.best_match_len,
447                })?;
448            }
449
450            if settings.eos_holdout_bytes > 0 {
451                // dump these out as literals
452                debug_assert!(win.tot_ahead_sz() as u64 <= settings.eos_holdout_bytes);
453                while win.tot_ahead_sz() > 0 {
454                    outp(LZOutput::Lit(win.peek_byte(0)))?;
455                    win.roll_window(1);
456                }
457            }
458            assert!(win.tot_ahead_sz() == 0);
459        }
460
461        Ok(())
462    }
463}
464
465#[cfg(feature = "alloc")]
466#[cfg(test)]
467mod tests {
468    extern crate std;
469    use std::{
470        boxed::Box,
471        fs::File,
472        io::{BufWriter, Write},
473        println,
474        vec::Vec,
475    };
476
477    use super::*;
478
479    #[test]
480    fn lz_head_only() {
481        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
482            LZEngine::new_boxed();
483        let mut compressed_out = Vec::new();
484        lz.compress::<_, ()>(&LZSettings::default(), &[0x12, 0x34, 0x56], true, |x| {
485            compressed_out.push(x);
486            Ok(())
487        })
488        .unwrap();
489
490        assert_eq!(compressed_out.len(), 3);
491        assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
492        assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
493        assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
494    }
495
496    #[test]
497    fn lz_head_split() {
498        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
499            LZEngine::new_boxed();
500        let mut compressed_out = Vec::new();
501        lz.compress::<_, ()>(&LZSettings::default(), &[0x12], false, |x| {
502            compressed_out.push(x);
503            Ok(())
504        })
505        .unwrap();
506        println!("compress 1");
507        lz.compress::<_, ()>(&LZSettings::default(), &[0x34, 0x56], true, |x| {
508            compressed_out.push(x);
509            Ok(())
510        })
511        .unwrap();
512        println!("compress 2");
513
514        assert_eq!(compressed_out.len(), 3);
515        assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
516        assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
517        assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
518    }
519
520    #[test]
521    fn lz_not_compressible() {
522        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
523            LZEngine::new_boxed();
524        let mut compressed_out = Vec::new();
525        lz.compress::<_, ()>(
526            &LZSettings::default(),
527            &[0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc],
528            true,
529            |x| {
530                compressed_out.push(x);
531                Ok(())
532            },
533        )
534        .unwrap();
535
536        assert_eq!(compressed_out.len(), 6);
537        assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
538        assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
539        assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
540        assert_eq!(compressed_out[3], LZOutput::Lit(0x78));
541        assert_eq!(compressed_out[4], LZOutput::Lit(0x9a));
542        assert_eq!(compressed_out[5], LZOutput::Lit(0xbc));
543    }
544
545    #[test]
546    fn lz_simple_repeat() {
547        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
548            LZEngine::new_boxed();
549        let mut compressed_out = Vec::new();
550        lz.compress::<_, ()>(
551            &LZSettings::default(),
552            &[0x12, 0x34, 0x56, 0x12, 0x34, 0x56],
553            true,
554            |x| {
555                compressed_out.push(x);
556                Ok(())
557            },
558        )
559        .unwrap();
560
561        assert_eq!(compressed_out.len(), 4);
562        assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
563        assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
564        assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
565        assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 3 });
566    }
567
568    #[test]
569    fn lz_longer_than_disp_repeat() {
570        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
571            LZEngine::new_boxed();
572        let mut compressed_out = Vec::new();
573        lz.compress::<_, ()>(
574            &LZSettings::default(),
575            &[0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56],
576            true,
577            |x| {
578                compressed_out.push(x);
579                Ok(())
580            },
581        )
582        .unwrap();
583
584        assert_eq!(compressed_out.len(), 4);
585        assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
586        assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
587        assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
588        assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 6 });
589    }
590
591    #[test]
592    fn lz_longer_than_lookahead_repeat() {
593        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
594            LZEngine::new_boxed();
595        let mut compressed_out = Vec::new();
596        lz.compress::<_, ()>(
597            &LZSettings::default(),
598            &[
599                0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34,
600                0x56, 0x12, 0x34, 0x56,
601            ],
602            true,
603            |x| {
604                compressed_out.push(x);
605                Ok(())
606            },
607        )
608        .unwrap();
609
610        assert_eq!(compressed_out.len(), 4);
611        assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
612        assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
613        assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
614        assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 15 });
615    }
616
617    #[test]
618    fn lz_split_repeat() {
619        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
620            LZEngine::new_boxed();
621        let mut compressed_out = Vec::new();
622        lz.compress::<_, ()>(
623            &LZSettings::default(),
624            &[
625                0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56,
626            ],
627            false,
628            |x| {
629                compressed_out.push(x);
630                Ok(())
631            },
632        )
633        .unwrap();
634        lz.compress::<_, ()>(
635            &LZSettings::default(),
636            &[0x12, 0x34, 0x56, 0x12, 0x34, 0x56],
637            true,
638            |x| {
639                compressed_out.push(x);
640                Ok(())
641            },
642        )
643        .unwrap();
644
645        assert_eq!(compressed_out.len(), 5);
646        assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
647        assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
648        assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
649        assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 9 });
650        assert_eq!(compressed_out[4], LZOutput::Ref { disp: 3, len: 6 });
651    }
652
653    #[test]
654    fn lz_detailed_backref_hashing() {
655        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
656            LZEngine::new_boxed();
657        let mut compressed_out = Vec::new();
658        lz.compress::<_, ()>(
659            &LZSettings::default(),
660            &[
661                0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12,
662            ],
663            false,
664            |x| {
665                compressed_out.push(x);
666                Ok(())
667            },
668        )
669        .unwrap();
670        lz.compress::<_, ()>(
671            &LZSettings::default(),
672            &[0x34, 0x56, 0x12, 0x34, 0x56],
673            true,
674            |x| {
675                compressed_out.push(x);
676                Ok(())
677            },
678        )
679        .unwrap();
680
681        assert_eq!(compressed_out.len(), 5);
682        assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
683        assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
684        assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
685        assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 10 });
686        assert_eq!(compressed_out[4], LZOutput::Ref { disp: 3, len: 5 });
687
688        assert_eq!(lz.h.htab[(0x12 << 10) ^ (0x34 << 5) ^ 0x56], 15);
689        assert_eq!(lz.h.prev[15], 12);
690        assert_eq!(lz.h.prev[12], 9);
691        assert_eq!(lz.h.prev[9], 6);
692        assert_eq!(lz.h.prev[6], 3);
693        assert_eq!(lz.h.prev[3], 0);
694        assert_eq!(lz.h.prev[0], u64::MAX);
695
696        assert_eq!(lz.h.htab[(0x14 << 10) ^ (0x56 << 5) ^ 0x12], 13);
697        assert_eq!(lz.h.prev[13], 10);
698        assert_eq!(lz.h.prev[10], 7);
699        assert_eq!(lz.h.prev[7], 4);
700        assert_eq!(lz.h.prev[4], 1);
701        assert_eq!(lz.h.prev[1], u64::MAX);
702
703        assert_eq!(lz.h.htab[(0x16 << 10) ^ (0x12 << 5) ^ 0x34], 14);
704        assert_eq!(lz.h.prev[14], 11);
705        assert_eq!(lz.h.prev[11], 8);
706        assert_eq!(lz.h.prev[8], 5);
707        assert_eq!(lz.h.prev[5], 2);
708        assert_eq!(lz.h.prev[2], u64::MAX);
709    }
710
711    #[test]
712    fn lz_peek_ahead_after_span() {
713        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
714            LZEngine::new_boxed();
715        let mut compressed_out = Vec::new();
716        lz.compress::<_, ()>(
717            &LZSettings::default(),
718            &[
719                0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
720            ],
721            false,
722            |x| {
723                compressed_out.push(x);
724                Ok(())
725            },
726        )
727        .unwrap();
728        lz.compress::<_, ()>(
729            &LZSettings::default(),
730            &[0x34, 0x56, 0x12, 0x34, 0x56, 0xde],
731            true,
732            |x| {
733                compressed_out.push(x);
734                Ok(())
735            },
736        )
737        .unwrap();
738
739        assert_eq!(compressed_out.len(), 9);
740        assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
741        assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
742        assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
743        assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 6 });
744        assert_eq!(compressed_out[4], LZOutput::Lit(0x78));
745        assert_eq!(compressed_out[5], LZOutput::Lit(0x9a));
746        assert_eq!(compressed_out[6], LZOutput::Lit(0xbc));
747        assert_eq!(compressed_out[7], LZOutput::Ref { disp: 8, len: 5 });
748        assert_eq!(compressed_out[8], LZOutput::Lit(0xde));
749
750        assert_eq!(lz.h.htab[(0x12 << 10) ^ (0x34 << 5) ^ 0x56], 14);
751        assert_eq!(lz.h.prev[14], 6);
752        assert_eq!(lz.h.prev[6], 3);
753        assert_eq!(lz.h.prev[3], 0);
754        assert_eq!(lz.h.prev[0], u64::MAX);
755
756        assert_eq!(lz.h.htab[(0x14 << 10) ^ (0x56 << 5) ^ 0x12], 12);
757        assert_eq!(lz.h.prev[12], 4);
758        assert_eq!(lz.h.prev[4], 1);
759        assert_eq!(lz.h.prev[1], u64::MAX);
760
761        assert_eq!(lz.h.htab[(0x16 << 10) ^ (0x12 << 5) ^ 0x34], 13);
762        assert_eq!(lz.h.prev[13], 5);
763        assert_eq!(lz.h.prev[5], 2);
764        assert_eq!(lz.h.prev[2], u64::MAX);
765
766        assert_eq!(lz.h.htab[(0x14 << 10) ^ (0x56 << 5) ^ 0x78], 7);
767        assert_eq!(lz.h.prev[7], u64::MAX);
768
769        assert_eq!(lz.h.htab[(0x14 << 10) ^ (0x56 << 5) ^ 0xde], 15);
770        assert_eq!(lz.h.prev[15], u64::MAX);
771
772        assert_eq!(lz.h.htab[(0x16 << 10) ^ (0x78 << 5) ^ 0x9a], 8);
773        assert_eq!(lz.h.prev[8], u64::MAX);
774
775        assert_eq!(lz.h.htab[(0x18 << 10) ^ (0x9a << 5) ^ 0xbc], 9);
776        assert_eq!(lz.h.prev[9], u64::MAX);
777
778        assert_eq!(lz.h.htab[(0x1a << 10) ^ (0xbc << 5) ^ 0x34], 10);
779        assert_eq!(lz.h.prev[10], u64::MAX);
780
781        assert_eq!(lz.h.htab[(0x1c << 10) ^ (0x34 << 5) ^ 0x56], 11);
782        assert_eq!(lz.h.prev[11], u64::MAX);
783    }
784
785    #[test]
786    fn lz_big_file() {
787        let d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
788
789        let inp_fn = d.join("src/lz77.rs");
790        let outp_fn = d.join("test.bin");
791
792        let inp = std::fs::read(inp_fn).unwrap();
793
794        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
795            LZEngine::new_boxed();
796        let mut compressed_out = Vec::new();
797        lz.compress::<_, ()>(&LZSettings::default(), &inp, true, |x| {
798            compressed_out.push(x);
799            Ok(())
800        })
801        .unwrap();
802
803        let mut outp_f = BufWriter::new(File::create(outp_fn).unwrap());
804        for &lz_tok in &compressed_out {
805            match lz_tok {
806                LZOutput::Lit(lit) => {
807                    outp_f.write(&[0]).unwrap();
808                    outp_f.write(&[lit]).unwrap();
809                }
810                LZOutput::Ref { disp, len } => {
811                    outp_f.write(&[0xff]).unwrap();
812                    outp_f.write(&disp.to_le_bytes()).unwrap();
813                    outp_f.write(&len.to_le_bytes()).unwrap();
814                }
815            }
816        }
817
818        let mut decompress = Vec::new();
819        for &lz_tok in &compressed_out {
820            match lz_tok {
821                LZOutput::Lit(lit) => {
822                    decompress.push(lit);
823                }
824                LZOutput::Ref { disp, len } => {
825                    assert!(len > 0);
826                    assert!(len <= 256);
827                    assert!(disp >= 1);
828                    assert!(disp <= 256);
829                    for _ in 0..len {
830                        decompress.push(decompress[decompress.len() - disp as usize]);
831                    }
832                }
833            }
834        }
835
836        assert_eq!(inp, decompress);
837    }
838
839    #[test]
840    fn lz_big_file_2() {
841        let d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
842
843        let inp_fn = d.join("src/lz77.rs");
844        let outp_fn = d.join("test2.bin");
845
846        let inp = std::fs::read(inp_fn).unwrap();
847
848        let mut lz: Box<
849            LZEngine<32768, 256, { 32768 + 256 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>,
850        > = LZEngine::new_boxed();
851        let mut compressed_out = Vec::new();
852        for i in 0..inp.len() {
853            lz.compress::<_, ()>(&LZSettings::default(), &[inp[i]], false, |x| {
854                compressed_out.push(x);
855                Ok(())
856            })
857            .unwrap();
858        }
859        lz.compress::<_, ()>(&LZSettings::default(), &[], true, |x| {
860            compressed_out.push(x);
861            Ok(())
862        })
863        .unwrap();
864
865        let mut outp_f = BufWriter::new(File::create(outp_fn).unwrap());
866        for &lz_tok in &compressed_out {
867            match lz_tok {
868                LZOutput::Lit(lit) => {
869                    outp_f.write(&[0]).unwrap();
870                    outp_f.write(&[lit]).unwrap();
871                }
872                LZOutput::Ref { disp, len } => {
873                    outp_f.write(&[0xff]).unwrap();
874                    outp_f.write(&disp.to_le_bytes()).unwrap();
875                    outp_f.write(&len.to_le_bytes()).unwrap();
876                }
877            }
878        }
879
880        let mut decompress = Vec::new();
881        for &lz_tok in &compressed_out {
882            match lz_tok {
883                LZOutput::Lit(lit) => {
884                    decompress.push(lit);
885                }
886                LZOutput::Ref { disp, len } => {
887                    assert!(len > 0);
888                    assert!(len <= 256);
889                    assert!(disp >= 1);
890                    assert!(disp <= 32768);
891                    for _ in 0..len {
892                        decompress.push(decompress[decompress.len() - disp as usize]);
893                    }
894                }
895            }
896        }
897
898        assert_eq!(inp, decompress);
899    }
900
901    #[test]
902    fn lz_max_insert_len() {
903        let mut settings = LZSettings::default();
904        settings.max_len_to_insert_all_substr = 4;
905
906        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
907            LZEngine::new_boxed();
908        let mut compressed_out = Vec::new();
909        lz.compress::<_, ()>(
910            &settings,
911            &[
912                0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56,
913            ],
914            true,
915            |x| {
916                compressed_out.push(x);
917                Ok(())
918            },
919        )
920        .unwrap();
921
922        assert_eq!(compressed_out.len(), 6);
923        assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
924        assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
925        assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
926        assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 6 });
927        assert_eq!(compressed_out[4], LZOutput::Lit(0x78));
928        assert_eq!(compressed_out[5], LZOutput::Ref { disp: 7, len: 3 });
929    }
930
931    #[test]
932    fn lz_deferred_insert() {
933        let mut settings = LZSettings::default();
934        settings.defer_output_match = true;
935
936        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
937            LZEngine::new_boxed();
938        let mut compressed_out = Vec::new();
939        lz.compress::<_, ()>(
940            &settings,
941            &[1, 2, 3, 2, 3, 4, 5, 1, 2, 3, 4, 5],
942            true,
943            |x| {
944                compressed_out.push(x);
945                Ok(())
946            },
947        )
948        .unwrap();
949
950        assert_eq!(compressed_out.len(), 9);
951        assert_eq!(compressed_out[0], LZOutput::Lit(1));
952        assert_eq!(compressed_out[1], LZOutput::Lit(2));
953        assert_eq!(compressed_out[2], LZOutput::Lit(3));
954        assert_eq!(compressed_out[3], LZOutput::Lit(2));
955        assert_eq!(compressed_out[4], LZOutput::Lit(3));
956        assert_eq!(compressed_out[5], LZOutput::Lit(4));
957        assert_eq!(compressed_out[6], LZOutput::Lit(5));
958        assert_eq!(compressed_out[7], LZOutput::Lit(1));
959        assert_eq!(compressed_out[8], LZOutput::Ref { disp: 5, len: 4 });
960    }
961
962    #[test]
963    fn lz_deferred_insert_sike() {
964        let mut settings = LZSettings::default();
965        settings.defer_output_match = true;
966        settings.good_enough_defer_len = 3;
967
968        let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
969            LZEngine::new_boxed();
970        let mut compressed_out = Vec::new();
971        lz.compress::<_, ()>(
972            &settings,
973            &[1, 2, 3, 2, 3, 4, 5, 1, 2, 3, 4, 5],
974            true,
975            |x| {
976                compressed_out.push(x);
977                Ok(())
978            },
979        )
980        .unwrap();
981
982        assert_eq!(compressed_out.len(), 10);
983        assert_eq!(compressed_out[0], LZOutput::Lit(1));
984        assert_eq!(compressed_out[1], LZOutput::Lit(2));
985        assert_eq!(compressed_out[2], LZOutput::Lit(3));
986        assert_eq!(compressed_out[3], LZOutput::Lit(2));
987        assert_eq!(compressed_out[4], LZOutput::Lit(3));
988        assert_eq!(compressed_out[5], LZOutput::Lit(4));
989        assert_eq!(compressed_out[6], LZOutput::Lit(5));
990        assert_eq!(compressed_out[7], LZOutput::Ref { disp: 7, len: 3 });
991        // the defer doesn't kick in because this match is good enough
992        assert_eq!(compressed_out[8], LZOutput::Lit(4));
993        assert_eq!(compressed_out[9], LZOutput::Lit(5));
994    }
995
996    #[test]
997    fn lz_big_file_defer() {
998        let d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
999
1000        let inp_fn = d.join("src/lz77.rs");
1001        let outp_fn = d.join("test3.bin");
1002
1003        let inp = std::fs::read(inp_fn).unwrap();
1004
1005        let mut settings = LZSettings::default();
1006        settings.defer_output_match = true;
1007        let mut lz: Box<
1008            LZEngine<32768, 256, { 32768 + 256 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>,
1009        > = LZEngine::new_boxed();
1010        let mut compressed_out = Vec::new();
1011        for i in 0..inp.len() {
1012            lz.compress::<_, ()>(&settings, &[inp[i]], false, |x| {
1013                compressed_out.push(x);
1014                Ok(())
1015            })
1016            .unwrap();
1017        }
1018        lz.compress::<_, ()>(&settings, &[], true, |x| {
1019            compressed_out.push(x);
1020            Ok(())
1021        })
1022        .unwrap();
1023
1024        let mut outp_f = BufWriter::new(File::create(outp_fn).unwrap());
1025        for &lz_tok in &compressed_out {
1026            match lz_tok {
1027                LZOutput::Lit(lit) => {
1028                    outp_f.write(&[0]).unwrap();
1029                    outp_f.write(&[lit]).unwrap();
1030                }
1031                LZOutput::Ref { disp, len } => {
1032                    outp_f.write(&[0xff]).unwrap();
1033                    outp_f.write(&disp.to_le_bytes()).unwrap();
1034                    outp_f.write(&len.to_le_bytes()).unwrap();
1035                }
1036            }
1037        }
1038
1039        let mut decompress = Vec::new();
1040        for &lz_tok in &compressed_out {
1041            match lz_tok {
1042                LZOutput::Lit(lit) => {
1043                    decompress.push(lit);
1044                }
1045                LZOutput::Ref { disp, len } => {
1046                    assert!(len > 0);
1047                    assert!(len <= 256);
1048                    assert!(disp >= 1);
1049                    assert!(disp <= 32768);
1050                    for _ in 0..len {
1051                        decompress.push(decompress[decompress.len() - disp as usize]);
1052                    }
1053                }
1054            }
1055        }
1056
1057        assert_eq!(inp, decompress);
1058    }
1059}