1use crate::hashtables::HashBits;
2use crate::sliding_window::SlidingWindowBuf;
3
4#[cfg(feature = "alloc")]
5extern crate alloc as alloc_crate;
6#[cfg(feature = "alloc")]
7use alloc_crate::{alloc, boxed::Box};
8
9#[derive(Clone, Copy, PartialEq, Eq, Debug)]
11pub struct LZSettings {
12 pub good_enough_search_len: u64,
16 pub max_len_to_insert_all_substr: u64,
21 pub max_prev_chain_follows: u64,
25 pub defer_output_match: bool,
29 pub good_enough_defer_len: u64,
34 pub search_faster_defer_len: u64,
39 pub min_disp: u64,
43 pub eos_holdout_bytes: u64,
47}
48
49impl Default for LZSettings {
50 fn default() -> Self {
51 Self {
52 good_enough_search_len: u64::MAX,
53 max_len_to_insert_all_substr: u64::MAX,
54 max_prev_chain_follows: u64::MAX,
55 defer_output_match: false,
56 good_enough_defer_len: u64::MAX,
57 search_faster_defer_len: u64::MAX,
58 min_disp: 1,
59 eos_holdout_bytes: 0,
60 }
61 }
62}
63
64#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
67pub enum LZOutput {
68 Lit(u8),
70 Ref { disp: u64, len: u64 },
72}
73
74impl Default for LZOutput {
75 fn default() -> Self {
76 Self::Lit(0)
77 }
78}
79
80#[derive(Clone, Copy, PartialEq, Eq, Debug)]
81struct DeferredMatch {
82 first_byte: u8,
83 best_match_pos: u64,
84 best_match_len: u64,
85}
86
87pub struct LZEngine<
101 const LOOKBACK_SZ: usize,
102 const LOOKAHEAD_SZ: usize,
103 const TOT_BUF_SZ: usize,
104 const MIN_MATCH: usize,
105 const MAX_MATCH: usize,
106 const HASH_BITS: usize,
107 const HASH_SZ: usize,
108 const DICT_BITS: usize,
109 const DICT_SZ: usize,
110> {
111 pub(crate) sbuf: SlidingWindowBuf<LOOKBACK_SZ, LOOKAHEAD_SZ, TOT_BUF_SZ>,
112 pub(crate) h: HashBits<MIN_MATCH, HASH_BITS, HASH_SZ, DICT_BITS, DICT_SZ>,
113 redo_hash_at_cursor: bool,
114 redo_hash_behind_cursor_num_missing: u8,
115 deferred_match: Option<DeferredMatch>,
116}
117
118impl<
119 const LOOKBACK_SZ: usize,
120 const LOOKAHEAD_SZ: usize,
121 const TOT_BUF_SZ: usize,
122 const MIN_MATCH: usize,
123 const MAX_MATCH: usize,
124 const HASH_BITS: usize,
125 const HASH_SZ: usize,
126 const DICT_BITS: usize,
127 const DICT_SZ: usize,
128 >
129 LZEngine<
130 LOOKBACK_SZ,
131 LOOKAHEAD_SZ,
132 TOT_BUF_SZ,
133 MIN_MATCH,
134 MAX_MATCH,
135 HASH_BITS,
136 HASH_SZ,
137 DICT_BITS,
138 DICT_SZ,
139 >
140{
141 pub fn new() -> Self {
143 assert_eq!(HASH_SZ, 1 << HASH_BITS);
144 assert_eq!(DICT_SZ, 1 << DICT_BITS);
145 assert!(MIN_MATCH >= 1);
146 assert!(MIN_MATCH <= u8::MAX as usize);
147 assert!(LOOKAHEAD_SZ >= MIN_MATCH);
149 assert!(HASH_BITS <= 32);
150
151 Self {
152 sbuf: SlidingWindowBuf::new(),
153 h: HashBits::new(),
154 redo_hash_at_cursor: true,
155 redo_hash_behind_cursor_num_missing: 0,
156 deferred_match: None,
157 }
158 }
159 #[cfg(feature = "alloc")]
164 pub fn new_boxed() -> Box<Self> {
165 unsafe {
166 let layout = core::alloc::Layout::new::<Self>();
167 let p = alloc::alloc(layout) as *mut Self;
168 Self::initialize_at(p);
169 Box::from_raw(p)
170 }
171 }
172 pub unsafe fn initialize_at(p: *mut Self) {
177 assert_eq!(HASH_SZ, 1 << HASH_BITS);
178 assert_eq!(DICT_SZ, 1 << DICT_BITS);
179 assert!(MIN_MATCH >= 1);
180 assert!(MIN_MATCH <= u8::MAX as usize);
181 assert!(LOOKAHEAD_SZ >= MIN_MATCH);
183 assert!(HASH_BITS <= 32);
184
185 SlidingWindowBuf::initialize_at(core::ptr::addr_of_mut!((*p).sbuf));
186 HashBits::initialize_at(core::ptr::addr_of_mut!((*p).h));
187 (*p).redo_hash_at_cursor = true;
188 (*p).redo_hash_behind_cursor_num_missing = 0;
189 (*p).deferred_match = None;
190 }
191
192 pub fn compress<O, E>(
199 &mut self,
200 settings: &LZSettings,
201 inp: &[u8],
202 end_of_stream: bool,
203 mut outp: O,
204 ) -> Result<(), E>
205 where
206 O: FnMut(LZOutput) -> Result<(), E>,
207 {
208 assert!(settings.min_disp >= 1);
209 assert!(settings.eos_holdout_bytes <= LOOKAHEAD_SZ as u64);
210
211 let mut win = self.sbuf.add_inp(inp);
212
213 if !settings.defer_output_match {
214 if let Some(deferred_match) = self.deferred_match {
215 outp(LZOutput::Lit(deferred_match.first_byte))?;
217 self.deferred_match = None;
218 }
219 }
220
221 let required_min_bytes_left = if end_of_stream {
222 settings.eos_holdout_bytes as usize
223 } else {
224 LOOKAHEAD_SZ
225 };
226
227 while win.tot_ahead_sz() > required_min_bytes_left {
228 if self.redo_hash_behind_cursor_num_missing > 0 {
229 let mut hash = self.h.hash_of_head;
233
234 for i in (0..self.redo_hash_behind_cursor_num_missing).rev() {
235 hash = self
238 .h
239 .calc_new_hash(hash, win.peek_byte(MIN_MATCH - 1 - i as usize));
240
241 if i != 0 {
242 let _old_hpos = self.h.put_raw_into_htab(hash, win.cursor_pos() - i as u64);
243 }
244 }
245
246 self.redo_hash_behind_cursor_num_missing = 0;
247 self.h.hash_of_head = hash;
248 }
249
250 if self.redo_hash_at_cursor {
251 let mut hash = 0;
258 for i in 0..MIN_MATCH {
259 hash = self.h.calc_new_hash(hash, win.peek_byte(i));
260 }
261 self.h.hash_of_head = hash;
262 self.redo_hash_at_cursor = false;
263 }
264
265 let b: u8 = win.peek_byte(0);
266
267 let mut old_hpos = self.h.put_head_into_htab(&win);
268
269 let mut best_match_endpeek = b;
270 let mut best_match_len = 1;
271 let mut best_match_pos = u64::MAX;
272
273 let max_match = if end_of_stream && settings.eos_holdout_bytes > 0 {
275 win.tot_ahead_sz() - settings.eos_holdout_bytes as usize
276 } else {
277 MAX_MATCH.try_into().unwrap_or(usize::MAX - MIN_MATCH - 1)
279 };
280 let cursor_spans = win.get_next_spans(win.cursor_pos(), max_match + MIN_MATCH + 1);
283
284 let skip_search = if let Some(deferred_match) = self.deferred_match {
285 debug_assert!(settings.defer_output_match);
287 deferred_match.best_match_len >= settings.good_enough_defer_len
288 } else {
289 false
290 };
291
292 let mut prev_follow_limit = if let Some(deferred_match) = self.deferred_match {
293 if deferred_match.best_match_len >= settings.search_faster_defer_len {
294 settings.max_prev_chain_follows / 4
295 } else {
296 settings.max_prev_chain_follows
297 }
298 } else {
299 settings.max_prev_chain_follows
300 };
301
302 if !skip_search {
303 while prev_follow_limit > 0
307 && old_hpos != u64::MAX
308 && old_hpos + (LOOKBACK_SZ as u64) >= win.cursor_pos()
309 {
310 prev_follow_limit -= 1;
311 let eval_hpos = old_hpos;
312 old_hpos = self.h.prev[(old_hpos & ((1 << DICT_BITS) - 1)) as usize];
313
314 if !(eval_hpos + settings.min_disp <= win.cursor_pos()) {
315 continue;
317 }
318
319 let lookback_spans = win.get_next_spans(eval_hpos, max_match);
320
321 if lookback_spans[best_match_len - 1] != best_match_endpeek {
324 continue;
325 }
326
327 let match_len = lookback_spans.compare(&cursor_spans);
328 debug_assert!(match_len <= MAX_MATCH);
329 if match_len >= MIN_MATCH {
330 if match_len > best_match_len {
331 best_match_len = match_len;
332 best_match_pos = eval_hpos;
333 best_match_endpeek = lookback_spans[match_len - 1];
334 if match_len as u64 >= settings.good_enough_search_len {
335 break;
336 }
337 }
338 }
339 }
340 }
341
342 if settings.defer_output_match {
343 if let Some(deferred_match) = self.deferred_match {
344 if deferred_match.best_match_len >= best_match_len as u64 {
345 outp(LZOutput::Ref {
349 disp: win.cursor_pos() - 1 - deferred_match.best_match_pos,
351 len: deferred_match.best_match_len,
352 })?;
353
354 let match_len_minus_1 = (deferred_match.best_match_len - 1) as usize;
355 if deferred_match.best_match_len <= settings.max_len_to_insert_all_substr {
356 let deferred_span =
359 win.get_next_spans(win.cursor_pos(), match_len_minus_1 + MIN_MATCH);
360 debug_assert!(deferred_span.len() >= match_len_minus_1);
361 self.h.put_span_into_htab(
362 &deferred_span,
363 win.cursor_pos(),
364 match_len_minus_1,
365 );
366 let avail_extra_bytes = deferred_span.len() - match_len_minus_1;
367 if avail_extra_bytes < MIN_MATCH {
368 self.redo_hash_behind_cursor_num_missing =
369 (MIN_MATCH - avail_extra_bytes) as u8;
370 }
371 } else {
372 self.redo_hash_at_cursor = true;
373 }
374 win.roll_window(match_len_minus_1);
375 self.deferred_match = None;
376 } else {
377 outp(LZOutput::Lit(deferred_match.first_byte))?;
381 self.deferred_match = Some(DeferredMatch {
382 first_byte: b,
383 best_match_pos,
384 best_match_len: best_match_len as u64,
385 });
386 win.roll_window(1);
387 }
388 } else {
389 if best_match_len < MIN_MATCH {
391 outp(LZOutput::Lit(b))?;
393 } else {
394 self.deferred_match = Some(DeferredMatch {
397 first_byte: b,
398 best_match_pos,
399 best_match_len: best_match_len as u64,
400 });
401 }
402 win.roll_window(1);
404 }
405 } else {
406 if best_match_len < MIN_MATCH {
407 outp(LZOutput::Lit(b))?;
409 win.roll_window(1);
410 } else {
411 outp(LZOutput::Ref {
413 disp: win.cursor_pos() - best_match_pos,
414 len: best_match_len as u64,
415 })?;
416 if best_match_len as u64 <= settings.max_len_to_insert_all_substr {
417 self.h
418 .put_span_into_htab(&cursor_spans, win.cursor_pos(), best_match_len);
419 let avail_extra_bytes = cursor_spans.len() - best_match_len;
420 if avail_extra_bytes < MIN_MATCH {
421 self.redo_hash_behind_cursor_num_missing =
422 (MIN_MATCH - avail_extra_bytes) as u8;
423 }
424 } else {
425 self.redo_hash_at_cursor = true;
426 }
427 win.roll_window(best_match_len);
428 }
429 }
430 }
431
432 if win.tot_ahead_sz() > 0 {
433 debug_assert!(win.tot_ahead_sz() <= LOOKAHEAD_SZ);
434 win.roll_window(0);
436 }
437
438 if end_of_stream {
439 if let Some(deferred_match) = self.deferred_match {
440 outp(LZOutput::Ref {
444 disp: win.cursor_pos() - 1 - deferred_match.best_match_pos,
446 len: deferred_match.best_match_len,
447 })?;
448 }
449
450 if settings.eos_holdout_bytes > 0 {
451 debug_assert!(win.tot_ahead_sz() as u64 <= settings.eos_holdout_bytes);
453 while win.tot_ahead_sz() > 0 {
454 outp(LZOutput::Lit(win.peek_byte(0)))?;
455 win.roll_window(1);
456 }
457 }
458 assert!(win.tot_ahead_sz() == 0);
459 }
460
461 Ok(())
462 }
463}
464
465#[cfg(feature = "alloc")]
466#[cfg(test)]
467mod tests {
468 extern crate std;
469 use std::{
470 boxed::Box,
471 fs::File,
472 io::{BufWriter, Write},
473 println,
474 vec::Vec,
475 };
476
477 use super::*;
478
479 #[test]
480 fn lz_head_only() {
481 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
482 LZEngine::new_boxed();
483 let mut compressed_out = Vec::new();
484 lz.compress::<_, ()>(&LZSettings::default(), &[0x12, 0x34, 0x56], true, |x| {
485 compressed_out.push(x);
486 Ok(())
487 })
488 .unwrap();
489
490 assert_eq!(compressed_out.len(), 3);
491 assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
492 assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
493 assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
494 }
495
496 #[test]
497 fn lz_head_split() {
498 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
499 LZEngine::new_boxed();
500 let mut compressed_out = Vec::new();
501 lz.compress::<_, ()>(&LZSettings::default(), &[0x12], false, |x| {
502 compressed_out.push(x);
503 Ok(())
504 })
505 .unwrap();
506 println!("compress 1");
507 lz.compress::<_, ()>(&LZSettings::default(), &[0x34, 0x56], true, |x| {
508 compressed_out.push(x);
509 Ok(())
510 })
511 .unwrap();
512 println!("compress 2");
513
514 assert_eq!(compressed_out.len(), 3);
515 assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
516 assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
517 assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
518 }
519
520 #[test]
521 fn lz_not_compressible() {
522 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
523 LZEngine::new_boxed();
524 let mut compressed_out = Vec::new();
525 lz.compress::<_, ()>(
526 &LZSettings::default(),
527 &[0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc],
528 true,
529 |x| {
530 compressed_out.push(x);
531 Ok(())
532 },
533 )
534 .unwrap();
535
536 assert_eq!(compressed_out.len(), 6);
537 assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
538 assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
539 assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
540 assert_eq!(compressed_out[3], LZOutput::Lit(0x78));
541 assert_eq!(compressed_out[4], LZOutput::Lit(0x9a));
542 assert_eq!(compressed_out[5], LZOutput::Lit(0xbc));
543 }
544
545 #[test]
546 fn lz_simple_repeat() {
547 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
548 LZEngine::new_boxed();
549 let mut compressed_out = Vec::new();
550 lz.compress::<_, ()>(
551 &LZSettings::default(),
552 &[0x12, 0x34, 0x56, 0x12, 0x34, 0x56],
553 true,
554 |x| {
555 compressed_out.push(x);
556 Ok(())
557 },
558 )
559 .unwrap();
560
561 assert_eq!(compressed_out.len(), 4);
562 assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
563 assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
564 assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
565 assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 3 });
566 }
567
568 #[test]
569 fn lz_longer_than_disp_repeat() {
570 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
571 LZEngine::new_boxed();
572 let mut compressed_out = Vec::new();
573 lz.compress::<_, ()>(
574 &LZSettings::default(),
575 &[0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56],
576 true,
577 |x| {
578 compressed_out.push(x);
579 Ok(())
580 },
581 )
582 .unwrap();
583
584 assert_eq!(compressed_out.len(), 4);
585 assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
586 assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
587 assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
588 assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 6 });
589 }
590
591 #[test]
592 fn lz_longer_than_lookahead_repeat() {
593 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
594 LZEngine::new_boxed();
595 let mut compressed_out = Vec::new();
596 lz.compress::<_, ()>(
597 &LZSettings::default(),
598 &[
599 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34,
600 0x56, 0x12, 0x34, 0x56,
601 ],
602 true,
603 |x| {
604 compressed_out.push(x);
605 Ok(())
606 },
607 )
608 .unwrap();
609
610 assert_eq!(compressed_out.len(), 4);
611 assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
612 assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
613 assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
614 assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 15 });
615 }
616
617 #[test]
618 fn lz_split_repeat() {
619 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
620 LZEngine::new_boxed();
621 let mut compressed_out = Vec::new();
622 lz.compress::<_, ()>(
623 &LZSettings::default(),
624 &[
625 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56,
626 ],
627 false,
628 |x| {
629 compressed_out.push(x);
630 Ok(())
631 },
632 )
633 .unwrap();
634 lz.compress::<_, ()>(
635 &LZSettings::default(),
636 &[0x12, 0x34, 0x56, 0x12, 0x34, 0x56],
637 true,
638 |x| {
639 compressed_out.push(x);
640 Ok(())
641 },
642 )
643 .unwrap();
644
645 assert_eq!(compressed_out.len(), 5);
646 assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
647 assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
648 assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
649 assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 9 });
650 assert_eq!(compressed_out[4], LZOutput::Ref { disp: 3, len: 6 });
651 }
652
653 #[test]
654 fn lz_detailed_backref_hashing() {
655 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
656 LZEngine::new_boxed();
657 let mut compressed_out = Vec::new();
658 lz.compress::<_, ()>(
659 &LZSettings::default(),
660 &[
661 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12,
662 ],
663 false,
664 |x| {
665 compressed_out.push(x);
666 Ok(())
667 },
668 )
669 .unwrap();
670 lz.compress::<_, ()>(
671 &LZSettings::default(),
672 &[0x34, 0x56, 0x12, 0x34, 0x56],
673 true,
674 |x| {
675 compressed_out.push(x);
676 Ok(())
677 },
678 )
679 .unwrap();
680
681 assert_eq!(compressed_out.len(), 5);
682 assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
683 assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
684 assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
685 assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 10 });
686 assert_eq!(compressed_out[4], LZOutput::Ref { disp: 3, len: 5 });
687
688 assert_eq!(lz.h.htab[(0x12 << 10) ^ (0x34 << 5) ^ 0x56], 15);
689 assert_eq!(lz.h.prev[15], 12);
690 assert_eq!(lz.h.prev[12], 9);
691 assert_eq!(lz.h.prev[9], 6);
692 assert_eq!(lz.h.prev[6], 3);
693 assert_eq!(lz.h.prev[3], 0);
694 assert_eq!(lz.h.prev[0], u64::MAX);
695
696 assert_eq!(lz.h.htab[(0x14 << 10) ^ (0x56 << 5) ^ 0x12], 13);
697 assert_eq!(lz.h.prev[13], 10);
698 assert_eq!(lz.h.prev[10], 7);
699 assert_eq!(lz.h.prev[7], 4);
700 assert_eq!(lz.h.prev[4], 1);
701 assert_eq!(lz.h.prev[1], u64::MAX);
702
703 assert_eq!(lz.h.htab[(0x16 << 10) ^ (0x12 << 5) ^ 0x34], 14);
704 assert_eq!(lz.h.prev[14], 11);
705 assert_eq!(lz.h.prev[11], 8);
706 assert_eq!(lz.h.prev[8], 5);
707 assert_eq!(lz.h.prev[5], 2);
708 assert_eq!(lz.h.prev[2], u64::MAX);
709 }
710
711 #[test]
712 fn lz_peek_ahead_after_span() {
713 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
714 LZEngine::new_boxed();
715 let mut compressed_out = Vec::new();
716 lz.compress::<_, ()>(
717 &LZSettings::default(),
718 &[
719 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
720 ],
721 false,
722 |x| {
723 compressed_out.push(x);
724 Ok(())
725 },
726 )
727 .unwrap();
728 lz.compress::<_, ()>(
729 &LZSettings::default(),
730 &[0x34, 0x56, 0x12, 0x34, 0x56, 0xde],
731 true,
732 |x| {
733 compressed_out.push(x);
734 Ok(())
735 },
736 )
737 .unwrap();
738
739 assert_eq!(compressed_out.len(), 9);
740 assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
741 assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
742 assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
743 assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 6 });
744 assert_eq!(compressed_out[4], LZOutput::Lit(0x78));
745 assert_eq!(compressed_out[5], LZOutput::Lit(0x9a));
746 assert_eq!(compressed_out[6], LZOutput::Lit(0xbc));
747 assert_eq!(compressed_out[7], LZOutput::Ref { disp: 8, len: 5 });
748 assert_eq!(compressed_out[8], LZOutput::Lit(0xde));
749
750 assert_eq!(lz.h.htab[(0x12 << 10) ^ (0x34 << 5) ^ 0x56], 14);
751 assert_eq!(lz.h.prev[14], 6);
752 assert_eq!(lz.h.prev[6], 3);
753 assert_eq!(lz.h.prev[3], 0);
754 assert_eq!(lz.h.prev[0], u64::MAX);
755
756 assert_eq!(lz.h.htab[(0x14 << 10) ^ (0x56 << 5) ^ 0x12], 12);
757 assert_eq!(lz.h.prev[12], 4);
758 assert_eq!(lz.h.prev[4], 1);
759 assert_eq!(lz.h.prev[1], u64::MAX);
760
761 assert_eq!(lz.h.htab[(0x16 << 10) ^ (0x12 << 5) ^ 0x34], 13);
762 assert_eq!(lz.h.prev[13], 5);
763 assert_eq!(lz.h.prev[5], 2);
764 assert_eq!(lz.h.prev[2], u64::MAX);
765
766 assert_eq!(lz.h.htab[(0x14 << 10) ^ (0x56 << 5) ^ 0x78], 7);
767 assert_eq!(lz.h.prev[7], u64::MAX);
768
769 assert_eq!(lz.h.htab[(0x14 << 10) ^ (0x56 << 5) ^ 0xde], 15);
770 assert_eq!(lz.h.prev[15], u64::MAX);
771
772 assert_eq!(lz.h.htab[(0x16 << 10) ^ (0x78 << 5) ^ 0x9a], 8);
773 assert_eq!(lz.h.prev[8], u64::MAX);
774
775 assert_eq!(lz.h.htab[(0x18 << 10) ^ (0x9a << 5) ^ 0xbc], 9);
776 assert_eq!(lz.h.prev[9], u64::MAX);
777
778 assert_eq!(lz.h.htab[(0x1a << 10) ^ (0xbc << 5) ^ 0x34], 10);
779 assert_eq!(lz.h.prev[10], u64::MAX);
780
781 assert_eq!(lz.h.htab[(0x1c << 10) ^ (0x34 << 5) ^ 0x56], 11);
782 assert_eq!(lz.h.prev[11], u64::MAX);
783 }
784
785 #[test]
786 fn lz_big_file() {
787 let d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
788
789 let inp_fn = d.join("src/lz77.rs");
790 let outp_fn = d.join("test.bin");
791
792 let inp = std::fs::read(inp_fn).unwrap();
793
794 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
795 LZEngine::new_boxed();
796 let mut compressed_out = Vec::new();
797 lz.compress::<_, ()>(&LZSettings::default(), &inp, true, |x| {
798 compressed_out.push(x);
799 Ok(())
800 })
801 .unwrap();
802
803 let mut outp_f = BufWriter::new(File::create(outp_fn).unwrap());
804 for &lz_tok in &compressed_out {
805 match lz_tok {
806 LZOutput::Lit(lit) => {
807 outp_f.write(&[0]).unwrap();
808 outp_f.write(&[lit]).unwrap();
809 }
810 LZOutput::Ref { disp, len } => {
811 outp_f.write(&[0xff]).unwrap();
812 outp_f.write(&disp.to_le_bytes()).unwrap();
813 outp_f.write(&len.to_le_bytes()).unwrap();
814 }
815 }
816 }
817
818 let mut decompress = Vec::new();
819 for &lz_tok in &compressed_out {
820 match lz_tok {
821 LZOutput::Lit(lit) => {
822 decompress.push(lit);
823 }
824 LZOutput::Ref { disp, len } => {
825 assert!(len > 0);
826 assert!(len <= 256);
827 assert!(disp >= 1);
828 assert!(disp <= 256);
829 for _ in 0..len {
830 decompress.push(decompress[decompress.len() - disp as usize]);
831 }
832 }
833 }
834 }
835
836 assert_eq!(inp, decompress);
837 }
838
839 #[test]
840 fn lz_big_file_2() {
841 let d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
842
843 let inp_fn = d.join("src/lz77.rs");
844 let outp_fn = d.join("test2.bin");
845
846 let inp = std::fs::read(inp_fn).unwrap();
847
848 let mut lz: Box<
849 LZEngine<32768, 256, { 32768 + 256 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>,
850 > = LZEngine::new_boxed();
851 let mut compressed_out = Vec::new();
852 for i in 0..inp.len() {
853 lz.compress::<_, ()>(&LZSettings::default(), &[inp[i]], false, |x| {
854 compressed_out.push(x);
855 Ok(())
856 })
857 .unwrap();
858 }
859 lz.compress::<_, ()>(&LZSettings::default(), &[], true, |x| {
860 compressed_out.push(x);
861 Ok(())
862 })
863 .unwrap();
864
865 let mut outp_f = BufWriter::new(File::create(outp_fn).unwrap());
866 for &lz_tok in &compressed_out {
867 match lz_tok {
868 LZOutput::Lit(lit) => {
869 outp_f.write(&[0]).unwrap();
870 outp_f.write(&[lit]).unwrap();
871 }
872 LZOutput::Ref { disp, len } => {
873 outp_f.write(&[0xff]).unwrap();
874 outp_f.write(&disp.to_le_bytes()).unwrap();
875 outp_f.write(&len.to_le_bytes()).unwrap();
876 }
877 }
878 }
879
880 let mut decompress = Vec::new();
881 for &lz_tok in &compressed_out {
882 match lz_tok {
883 LZOutput::Lit(lit) => {
884 decompress.push(lit);
885 }
886 LZOutput::Ref { disp, len } => {
887 assert!(len > 0);
888 assert!(len <= 256);
889 assert!(disp >= 1);
890 assert!(disp <= 32768);
891 for _ in 0..len {
892 decompress.push(decompress[decompress.len() - disp as usize]);
893 }
894 }
895 }
896 }
897
898 assert_eq!(inp, decompress);
899 }
900
901 #[test]
902 fn lz_max_insert_len() {
903 let mut settings = LZSettings::default();
904 settings.max_len_to_insert_all_substr = 4;
905
906 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
907 LZEngine::new_boxed();
908 let mut compressed_out = Vec::new();
909 lz.compress::<_, ()>(
910 &settings,
911 &[
912 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x12, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56,
913 ],
914 true,
915 |x| {
916 compressed_out.push(x);
917 Ok(())
918 },
919 )
920 .unwrap();
921
922 assert_eq!(compressed_out.len(), 6);
923 assert_eq!(compressed_out[0], LZOutput::Lit(0x12));
924 assert_eq!(compressed_out[1], LZOutput::Lit(0x34));
925 assert_eq!(compressed_out[2], LZOutput::Lit(0x56));
926 assert_eq!(compressed_out[3], LZOutput::Ref { disp: 3, len: 6 });
927 assert_eq!(compressed_out[4], LZOutput::Lit(0x78));
928 assert_eq!(compressed_out[5], LZOutput::Ref { disp: 7, len: 3 });
929 }
930
931 #[test]
932 fn lz_deferred_insert() {
933 let mut settings = LZSettings::default();
934 settings.defer_output_match = true;
935
936 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
937 LZEngine::new_boxed();
938 let mut compressed_out = Vec::new();
939 lz.compress::<_, ()>(
940 &settings,
941 &[1, 2, 3, 2, 3, 4, 5, 1, 2, 3, 4, 5],
942 true,
943 |x| {
944 compressed_out.push(x);
945 Ok(())
946 },
947 )
948 .unwrap();
949
950 assert_eq!(compressed_out.len(), 9);
951 assert_eq!(compressed_out[0], LZOutput::Lit(1));
952 assert_eq!(compressed_out[1], LZOutput::Lit(2));
953 assert_eq!(compressed_out[2], LZOutput::Lit(3));
954 assert_eq!(compressed_out[3], LZOutput::Lit(2));
955 assert_eq!(compressed_out[4], LZOutput::Lit(3));
956 assert_eq!(compressed_out[5], LZOutput::Lit(4));
957 assert_eq!(compressed_out[6], LZOutput::Lit(5));
958 assert_eq!(compressed_out[7], LZOutput::Lit(1));
959 assert_eq!(compressed_out[8], LZOutput::Ref { disp: 5, len: 4 });
960 }
961
962 #[test]
963 fn lz_deferred_insert_sike() {
964 let mut settings = LZSettings::default();
965 settings.defer_output_match = true;
966 settings.good_enough_defer_len = 3;
967
968 let mut lz: Box<LZEngine<256, 8, { 256 + 8 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>> =
969 LZEngine::new_boxed();
970 let mut compressed_out = Vec::new();
971 lz.compress::<_, ()>(
972 &settings,
973 &[1, 2, 3, 2, 3, 4, 5, 1, 2, 3, 4, 5],
974 true,
975 |x| {
976 compressed_out.push(x);
977 Ok(())
978 },
979 )
980 .unwrap();
981
982 assert_eq!(compressed_out.len(), 10);
983 assert_eq!(compressed_out[0], LZOutput::Lit(1));
984 assert_eq!(compressed_out[1], LZOutput::Lit(2));
985 assert_eq!(compressed_out[2], LZOutput::Lit(3));
986 assert_eq!(compressed_out[3], LZOutput::Lit(2));
987 assert_eq!(compressed_out[4], LZOutput::Lit(3));
988 assert_eq!(compressed_out[5], LZOutput::Lit(4));
989 assert_eq!(compressed_out[6], LZOutput::Lit(5));
990 assert_eq!(compressed_out[7], LZOutput::Ref { disp: 7, len: 3 });
991 assert_eq!(compressed_out[8], LZOutput::Lit(4));
993 assert_eq!(compressed_out[9], LZOutput::Lit(5));
994 }
995
996 #[test]
997 fn lz_big_file_defer() {
998 let d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
999
1000 let inp_fn = d.join("src/lz77.rs");
1001 let outp_fn = d.join("test3.bin");
1002
1003 let inp = std::fs::read(inp_fn).unwrap();
1004
1005 let mut settings = LZSettings::default();
1006 settings.defer_output_match = true;
1007 let mut lz: Box<
1008 LZEngine<32768, 256, { 32768 + 256 }, 3, 256, 15, { 1 << 15 }, 16, { 1 << 16 }>,
1009 > = LZEngine::new_boxed();
1010 let mut compressed_out = Vec::new();
1011 for i in 0..inp.len() {
1012 lz.compress::<_, ()>(&settings, &[inp[i]], false, |x| {
1013 compressed_out.push(x);
1014 Ok(())
1015 })
1016 .unwrap();
1017 }
1018 lz.compress::<_, ()>(&settings, &[], true, |x| {
1019 compressed_out.push(x);
1020 Ok(())
1021 })
1022 .unwrap();
1023
1024 let mut outp_f = BufWriter::new(File::create(outp_fn).unwrap());
1025 for &lz_tok in &compressed_out {
1026 match lz_tok {
1027 LZOutput::Lit(lit) => {
1028 outp_f.write(&[0]).unwrap();
1029 outp_f.write(&[lit]).unwrap();
1030 }
1031 LZOutput::Ref { disp, len } => {
1032 outp_f.write(&[0xff]).unwrap();
1033 outp_f.write(&disp.to_le_bytes()).unwrap();
1034 outp_f.write(&len.to_le_bytes()).unwrap();
1035 }
1036 }
1037 }
1038
1039 let mut decompress = Vec::new();
1040 for &lz_tok in &compressed_out {
1041 match lz_tok {
1042 LZOutput::Lit(lit) => {
1043 decompress.push(lit);
1044 }
1045 LZOutput::Ref { disp, len } => {
1046 assert!(len > 0);
1047 assert!(len <= 256);
1048 assert!(disp >= 1);
1049 assert!(disp <= 32768);
1050 for _ in 0..len {
1051 decompress.push(decompress[decompress.len() - disp as usize]);
1052 }
1053 }
1054 }
1055 }
1056
1057 assert_eq!(inp, decompress);
1058 }
1059}