1use core::fmt;
2use core::mem;
3
4use crate::util::*;
5
6#[cfg(feature = "alloc")]
7extern crate alloc;
8
9#[cfg(feature = "std")]
10extern crate std;
11
12const HTAB_LOG2: usize = 13;
13const HTAB_SZ: usize = 1 << HTAB_LOG2;
14
15#[derive(Debug, PartialEq, Eq)]
17#[non_exhaustive]
18pub enum CompressError {
19 OutputTooSmall,
23}
24impl fmt::Display for CompressError {
25 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
26 match self {
27 CompressError::OutputTooSmall => write!(f, "output buffer was insufficient"),
28 }
29 }
30}
31#[cfg(feature = "std")]
32impl std::error::Error for CompressError {}
33
34trait OutputHelper {
36 fn putc(&mut self, c: u8) -> Result<(), CompressError>;
37 fn put_buf(&mut self, buf: &[u8]) -> Result<(), CompressError>;
38 fn poke_l2(&mut self);
39}
40impl<'a> OutputHelper for BufOutput<'a> {
41 fn putc(&mut self, c: u8) -> Result<(), CompressError> {
42 if self.pos + 1 <= self.buf.len() {
43 self.buf[self.pos] = c;
44 self.pos += 1;
45 Ok(())
46 } else {
47 Err(CompressError::OutputTooSmall)
48 }
49 }
50 fn put_buf(&mut self, buf: &[u8]) -> Result<(), CompressError> {
51 let mut len = buf.len();
52 let mut did_overflow = false;
53 if self.pos + len > self.buf.len() {
54 did_overflow = true;
55 len = self.buf.len() - self.pos;
56 }
57
58 self.buf[self.pos..self.pos + len].copy_from_slice(&buf[..len]);
59 self.pos += len;
60
61 if did_overflow {
62 Err(CompressError::OutputTooSmall)
63 } else {
64 Ok(())
65 }
66 }
67
68 fn poke_l2(&mut self) {
69 self.buf[0] |= 0b001_00000;
70 }
71}
72
73#[cfg(feature = "alloc")]
74impl OutputHelper for VecOutput {
75 fn putc(&mut self, c: u8) -> Result<(), CompressError> {
76 self.vec.push(c);
77 Ok(())
78 }
79 fn put_buf(&mut self, buf: &[u8]) -> Result<(), CompressError> {
80 self.vec.extend_from_slice(buf);
81 Ok(())
82 }
83
84 fn poke_l2(&mut self) {
85 self.vec[0] |= 0b001_00000;
86 }
87}
88
89struct L1Output<O>(O);
91struct L2Output<O>(O);
93
94impl<O: OutputHelper> OutputSink<CompressError> for L1Output<O> {
95 fn put_lits(&mut self, mut lits: &[u8]) -> Result<(), CompressError> {
96 while lits.len() > 32 {
97 self.0.putc(31)?;
98 self.0.put_buf(&lits[..32])?;
99 lits = &lits[32..];
100 }
101
102 debug_assert!(lits.len() >= 1);
103 debug_assert!(lits.len() <= 32);
104
105 self.0.putc((lits.len() - 1) as u8)?;
107 self.0.put_buf(lits)?;
108
109 Ok(())
110 }
111
112 fn put_backref(&mut self, disp: usize, mut len: usize) -> Result<(), CompressError> {
113 debug_assert!(disp <= 8191);
114 debug_assert!(len >= 3);
115
116 while len > 0xff + 9 {
122 let b0 = 0b111_00000 | ((disp >> 8) as u8);
123 let b1 = 0xff - 2;
124 let b2 = disp as u8;
125 self.0.putc(b0)?;
126 self.0.putc(b1)?;
127 self.0.putc(b2)?;
128 len -= 0xff - 2 + 9;
129 }
130
131 if len <= 8 {
132 let b0 = (((len - 2) << 5) | (disp >> 8)) as u8;
134 let b1 = disp as u8;
135 self.0.putc(b0)?;
136 self.0.putc(b1)?;
137 } else {
138 let b0 = 0b111_00000 | ((disp >> 8) as u8);
140 let b1 = (len - 9) as u8;
141 let b2 = disp as u8;
142 self.0.putc(b0)?;
143 self.0.putc(b1)?;
144 self.0.putc(b2)?;
145 }
146
147 Ok(())
148 }
149}
150
151impl<O: OutputHelper> OutputSink<CompressError> for L2Output<O> {
152 fn put_lits(&mut self, mut lits: &[u8]) -> Result<(), CompressError> {
153 while lits.len() > 32 {
154 self.0.putc(31)?;
155 self.0.put_buf(&lits[..32])?;
156 lits = &lits[32..];
157 }
158
159 debug_assert!(lits.len() >= 1);
160 debug_assert!(lits.len() <= 32);
161
162 self.0.putc((lits.len() - 1) as u8)?;
164 self.0.put_buf(lits)?;
165
166 Ok(())
167 }
168
169 fn put_backref(&mut self, disp: usize, mut len: usize) -> Result<(), CompressError> {
170 debug_assert!(disp <= 8191 + 65535);
171 debug_assert!(len >= 3);
172
173 let earlydisp = usize::min(disp, 8191);
174 len -= 2;
175 let earlylen = usize::min(len, 7);
176
177 let b0 = ((earlylen << 5) | (earlydisp >> 8)) as u8;
178 self.0.putc(b0)?;
179
180 if earlylen == 7 {
181 len -= earlylen;
182 loop {
183 let blen = usize::min(len, 0xff) as u8;
184 self.0.putc(blen)?;
185 if blen != 0xff {
186 break;
187 }
188 len -= blen as usize;
189 }
190 }
191
192 self.0.putc(earlydisp as u8)?;
193 if earlydisp == 8191 {
194 let moredisp = disp - earlydisp;
195 self.0.putc((moredisp >> 8) as u8)?;
196 self.0.putc(moredisp as u8)?;
197 }
198
199 Ok(())
200 }
201}
202
203trait CompressSink {
205 const MAX_DISP: usize;
206 const IS_LEVEL2: bool;
207 fn poke_l2(&mut self);
208}
209impl<O: OutputHelper> CompressSink for L1Output<O> {
210 const MAX_DISP: usize = 8191;
211 const IS_LEVEL2: bool = false;
212 fn poke_l2(&mut self) {}
213}
214impl<O: OutputHelper> CompressSink for L2Output<O> {
215 const MAX_DISP: usize = 8191 + 65535;
216 const IS_LEVEL2: bool = true;
217 fn poke_l2(&mut self) {
218 self.0.poke_l2();
219 }
220}
221
222#[derive(Debug, Clone, Copy, PartialEq, Eq)]
224pub enum CompressionLevel {
225 Default,
227 Level1,
229 Level2,
231}
232impl Default for CompressionLevel {
233 fn default() -> Self {
234 Self::Default
235 }
236}
237
238fn fastlz_hash(v: u32) -> usize {
239 let h = v.wrapping_mul(2654435769);
240 let h = h >> (32 - HTAB_LOG2);
241 h as usize
242}
243
244trait InputHelper {
245 fn inc(&mut self, n: usize);
246 fn peek4(&mut self) -> Option<u32>;
247}
248impl InputHelper for &[u8] {
249 fn inc(&mut self, n: usize) {
250 *self = &self[n..];
251 }
252 fn peek4(&mut self) -> Option<u32> {
253 let ret = u32::from_le_bytes(*self.first_chunk::<4>()?);
254 Some(ret)
255 }
256}
257
258pub struct CompressState {
262 htab: [usize; HTAB_SZ],
263}
264impl CompressState {
265 pub fn new() -> Self {
267 Self { htab: [0; HTAB_SZ] }
268 }
269 #[cfg(feature = "alloc")]
270 pub fn new_boxed() -> alloc::boxed::Box<Self> {
274 unsafe {
276 let self_ = alloc::alloc::alloc_zeroed(core::alloc::Layout::new::<Self>()) as *mut Self;
277 alloc::boxed::Box::from_raw(self_)
278 }
279 }
280
281 fn compress_impl<L: OutputSink<CompressError> + CompressSink>(
282 &mut self,
283 mut inp: &[u8],
284 outp: &mut L,
285 ) -> Result<(), CompressError> {
286 if inp.len() == 0 {
287 return Ok(());
288 }
289
290 self.htab.fill(0);
291
292 let orig_inp = inp;
293 let mut lits_start_anchor_pos = 0;
294
295 inp.inc(1);
298
299 while let Some(hash_head) = inp.peek4() {
300 let hash = fastlz_hash(hash_head & 0xffffff);
301 let cur_pos = inp.as_ptr() as usize - orig_inp.as_ptr() as usize;
302 let ref_pos = mem::replace(&mut self.htab[hash], cur_pos);
303 let ref_ = &orig_inp[ref_pos..];
304 debug_assert!(cur_pos > ref_pos);
305 let disp = cur_pos - ref_pos - 1;
306
307 if disp <= L::MAX_DISP && inp[..3] == ref_[..3] {
308 if L::IS_LEVEL2 {
311 if disp >= 8191 {
312 if inp.len() < 5 {
314 break;
315 }
316 if inp[3..5] != ref_[3..5] {
317 inp.inc(1);
318 continue;
319 }
320 }
321 }
322
323 let mut len = 3 + inp[3..]
325 .iter()
326 .zip(ref_[3..].iter())
327 .map_while(|(a, b)| if a == b { Some(1) } else { None })
328 .fold(0, |a, x| a + x);
329
330 if L::IS_LEVEL2 {
331 if disp >= 8191 {
333 if len == inp.len() {
334 len -= 1;
335 }
336 }
337 }
338
339 let lits = &orig_inp[lits_start_anchor_pos..cur_pos];
341 if lits.len() > 0 {
342 outp.put_lits(lits)?;
343 }
344
345 outp.put_backref(disp, len)?;
347 lits_start_anchor_pos = cur_pos + len;
348
349 inp.inc(len - 2);
351 if let Some(hash_head) = inp.peek4() {
352 let hash = fastlz_hash(hash_head & 0xffffff);
353 let cur_pos = inp.as_ptr() as usize - orig_inp.as_ptr() as usize;
354 self.htab[hash] = cur_pos;
355
356 let hash = fastlz_hash((hash_head >> 8) & 0xffffff);
357 self.htab[hash] = cur_pos + 1;
358
359 inp.inc(2);
360 } else {
361 break;
362 }
363 } else {
364 inp.inc(1);
366 }
367 }
368
369 let lits = &orig_inp[lits_start_anchor_pos..];
371 if lits.len() > 0 {
372 outp.put_lits(lits)?;
373 }
374
375 outp.poke_l2();
376
377 Ok(())
378 }
379
380 pub fn compress_to_buf(
384 &mut self,
385 inp: &[u8],
386 outp: &mut [u8],
387 mut level: CompressionLevel,
388 ) -> Result<usize, CompressError> {
389 if level == CompressionLevel::Default {
390 if inp.len() < 65536 {
391 level = CompressionLevel::Level1;
392 } else {
393 level = CompressionLevel::Level2;
394 }
395 }
396
397 if level == CompressionLevel::Level1 {
398 let mut outp: L1Output<BufOutput> = L1Output(outp.into());
399 self.compress_impl(inp, &mut outp)?;
400 Ok(outp.0.pos)
401 } else {
402 let mut outp: L2Output<BufOutput> = L2Output(outp.into());
403 self.compress_impl(inp, &mut outp)?;
404 Ok(outp.0.pos)
405 }
406 }
407
408 #[cfg(feature = "alloc")]
409 pub fn compress_to_vec(
413 &mut self,
414 inp: &[u8],
415 mut level: CompressionLevel,
416 ) -> Result<alloc::vec::Vec<u8>, CompressError> {
417 let ret = alloc::vec::Vec::new();
418 if level == CompressionLevel::Default {
419 if inp.len() < 65536 {
420 level = CompressionLevel::Level1;
421 } else {
422 level = CompressionLevel::Level2;
423 }
424 }
425
426 if level == CompressionLevel::Level1 {
427 let mut ret: L1Output<VecOutput> = L1Output(ret.into());
428 self.compress_impl(inp, &mut ret)?;
429 Ok(ret.0.vec)
430 } else {
431 let mut ret: L2Output<VecOutput> = L2Output(ret.into());
432 self.compress_impl(inp, &mut ret)?;
433 Ok(ret.0.vec)
434 }
435 }
436}
437
438#[cfg(test)]
439mod tests {
440 use super::*;
441
442 #[test]
443 fn test_lv1_encoding_lit() {
444 {
445 let mut out = [0u8; 3];
446 let mut outbuf: L1Output<BufOutput> = L1Output((&mut out[..]).into());
447 outbuf.put_lits(&[1, 2]).unwrap();
448 assert_eq!(outbuf.0.buf, [0x01, 1, 2]);
449 }
450
451 {
452 let mut out = [0u8; 2];
453 let mut outbuf: L1Output<BufOutput> = L1Output((&mut out[..]).into());
454 outbuf.put_lits(&[1, 2]).expect_err("");
455 assert_eq!(outbuf.0.buf, [0x01, 1]);
456 }
457
458 {
459 let mut out = [0u8; 0];
460 let mut outbuf: L1Output<BufOutput> = L1Output((&mut out[..]).into());
461 outbuf.put_lits(&[0]).expect_err("");
462 }
463 }
464
465 #[test]
466 fn test_lv1_encoding_short() {
467 {
468 let mut out = [0u8; 2];
469 let mut outbuf: L1Output<BufOutput> = L1Output((&mut out[..]).into());
470 outbuf.put_backref(1, 5).unwrap();
471 assert_eq!(outbuf.0.buf, [0x60, 0x01]);
472 }
473
474 {
475 let mut out = [0u8; 1];
476 let mut outbuf: L1Output<BufOutput> = L1Output((&mut out[..]).into());
477 outbuf.put_backref(1, 5).expect_err("");
478 assert_eq!(outbuf.0.buf, [0x60]);
479 }
480 }
481
482 #[test]
483 fn test_lv1_encoding_long() {
484 {
485 let mut out = [0u8; 3];
486 let mut outbuf: L1Output<BufOutput> = L1Output((&mut out[..]).into());
487 outbuf.put_backref(1, 9).unwrap();
488 assert_eq!(outbuf.0.buf, [0xe0, 0x00, 0x01]);
489 }
490
491 {
492 let mut out = [0u8; 3];
493 let mut outbuf: L1Output<BufOutput> = L1Output((&mut out[..]).into());
494 outbuf.put_backref(1, 264).unwrap();
495 assert_eq!(outbuf.0.buf, [0xe0, 0xff, 0x01]);
496 }
497
498 {
499 let mut out = [0u8; 1];
500 let mut outbuf: L1Output<BufOutput> = L1Output((&mut out[..]).into());
501 outbuf.put_backref(1, 9).expect_err("");
502 assert_eq!(outbuf.0.buf, [0xe0]);
503 }
504 }
505
506 #[test]
507 fn test_lv1_encoding_verylong() {
508 {
509 let mut out = [0u8; 5];
511 let mut outbuf: L1Output<BufOutput> = L1Output((&mut out[..]).into());
512 outbuf.put_backref(1, 265).unwrap();
513 assert_eq!(outbuf.0.buf, [0xe0, 0xfd, 0x01, 0x20, 0x01]);
514 }
515
516 {
517 let mut out = [0u8; 6];
519 let mut outbuf: L1Output<BufOutput> = L1Output((&mut out[..]).into());
520 outbuf.put_backref(1, 526).unwrap();
521 assert_eq!(outbuf.0.buf, [0xe0, 0xfd, 0x01, 0xe0, 0xff, 0x01]);
522 }
523
524 {
525 let mut out = [0u8; 8];
527 let mut outbuf: L1Output<BufOutput> = L1Output((&mut out[..]).into());
528 outbuf.put_backref(1, 527).unwrap();
529 assert_eq!(
530 outbuf.0.buf,
531 [0xe0, 0xfd, 0x01, 0xe0, 0xfd, 0x01, 0x20, 0x01]
532 );
533 }
534 }
535
536 #[test]
537 fn test_lv2_encoding_lit() {
538 {
539 let mut out = [0u8; 3];
540 let mut outbuf: L2Output<BufOutput> = L2Output((&mut out[..]).into());
541 outbuf.put_lits(&[1, 2]).unwrap();
542 assert_eq!(outbuf.0.buf, [0x01, 1, 2]);
543 }
544
545 {
546 let mut out = [0u8; 2];
547 let mut outbuf: L2Output<BufOutput> = L2Output((&mut out[..]).into());
548 outbuf.put_lits(&[1, 2]).expect_err("");
549 assert_eq!(outbuf.0.buf, [0x01, 1]);
550 }
551
552 {
553 let mut out = [0u8; 0];
554 let mut outbuf: L2Output<BufOutput> = L2Output((&mut out[..]).into());
555 outbuf.put_lits(&[0]).expect_err("");
556 }
557 }
558
559 #[test]
560 fn test_lv2_encoding_short() {
561 {
562 let mut out = [0u8; 2];
563 let mut outbuf: L2Output<BufOutput> = L2Output((&mut out[..]).into());
564 outbuf.put_backref(1, 5).unwrap();
565 assert_eq!(outbuf.0.buf, [0x60, 0x01]);
566 }
567
568 {
569 let mut out = [0u8; 1];
570 let mut outbuf: L2Output<BufOutput> = L2Output((&mut out[..]).into());
571 outbuf.put_backref(1, 5).expect_err("");
572 assert_eq!(outbuf.0.buf, [0x60]);
573 }
574 }
575
576 #[test]
577 fn test_lv2_encoding_longlen() {
578 {
579 let mut out = [0u8; 3];
580 let mut outbuf: L2Output<BufOutput> = L2Output((&mut out[..]).into());
581 outbuf.put_backref(1, 9).unwrap();
582 assert_eq!(outbuf.0.buf, [0xe0, 0x00, 0x01]);
583 }
584
585 {
586 let mut out = [0u8; 4];
587 let mut outbuf: L2Output<BufOutput> = L2Output((&mut out[..]).into());
588 outbuf.put_backref(2, 9 + 0xff + 1).unwrap();
589 assert_eq!(outbuf.0.buf, [0xe0, 0xff, 0x01, 0x02]);
590 }
591 }
592
593 #[test]
594 fn test_lv2_encoding_longdisp() {
595 {
596 let mut out = [0u8; 4];
597 let mut outbuf: L2Output<BufOutput> = L2Output((&mut out[..]).into());
598 outbuf.put_backref(8191, 3).unwrap();
599 assert_eq!(outbuf.0.buf, [0x3f, 0xff, 0x00, 0x00]);
600 }
601
602 {
603 let mut out = [0u8; 4];
604 let mut outbuf: L2Output<BufOutput> = L2Output((&mut out[..]).into());
605 outbuf.put_backref(8192, 3).unwrap();
606 assert_eq!(outbuf.0.buf, [0x3f, 0xff, 0x00, 0x01]);
607 }
608 }
609
610 #[test]
611 fn test_lv2_encoding_longboth() {
612 {
613 let mut out = [0u8; 6];
614 let mut outbuf: L2Output<BufOutput> = L2Output((&mut out[..]).into());
615 outbuf.put_backref(8192, 9 + 0xff + 1).unwrap();
616 assert_eq!(outbuf.0.buf, [0xff, 0xff, 0x01, 0xff, 0x00, 0x01]);
617 }
618 }
619
620 #[test]
621 fn test_ref_hashes() {
622 assert_eq!(fastlz_hash(1), 5062);
623 assert_eq!(fastlz_hash(2), 1933);
624 assert_eq!(fastlz_hash(3), 6996);
625 assert_eq!(fastlz_hash(4), 3867);
626 assert_eq!(fastlz_hash(0xaa), 538);
627 assert_eq!(fastlz_hash(0xbb), 4688);
628 assert_eq!(fastlz_hash(0xff), 4904);
629 }
630
631 #[test]
632 fn test_short_and_uncompressible() {
633 {
634 let mut state = CompressState::new();
635 let mut out = [0u8; 3];
636 let len = state
637 .compress_to_buf(&[1, 2], &mut out, CompressionLevel::Level1)
638 .unwrap();
639 assert_eq!(len, out.len());
640 assert_eq!(out, [0x01, 1, 2]);
641 }
642
643 {
644 let mut state = CompressState::new();
645 let mut out = [0u8; 6];
646 let len = state
647 .compress_to_buf(&[1, 2, 3, 4, 5], &mut out, CompressionLevel::Level1)
648 .unwrap();
649 assert_eq!(len, out.len());
650 assert_eq!(out, [0x04, 1, 2, 3, 4, 5]);
651 }
652 }
653
654 #[test]
655 fn test_simple_backref() {
656 {
657 let mut state = CompressState::new();
658 let mut out = [0u8; 4];
659 let len = state
660 .compress_to_buf(&[1, 1, 1, 1, 1], &mut out, CompressionLevel::Level1)
661 .unwrap();
662 assert_eq!(len, out.len());
663 assert_eq!(out, [0x00, 1, 0x40, 0x00]);
664 }
665 {
666 let mut state = CompressState::new();
668 let mut out = [0u8; 6];
669 let len = state
670 .compress_to_buf(&[1, 1, 1, 1, 1, 2], &mut out, CompressionLevel::Level1)
671 .unwrap();
672 assert_eq!(len, out.len());
673 assert_eq!(out, [0x00, 1, 0x40, 0x00, 0x00, 2]);
674 }
675 {
676 let mut state = CompressState::new();
678 let mut out = [0u8; 6];
679 let len = state
680 .compress_to_buf(
681 &[1, 2, 3, 1, 2, 3, 1, 2, 3],
682 &mut out,
683 CompressionLevel::Level1,
684 )
685 .unwrap();
686 assert_eq!(len, out.len());
687 assert_eq!(out, [0x02, 1, 2, 3, 0x80, 0x02]);
688 }
689 {
690 let mut state = CompressState::new();
692 let mut out = [0u8; 8];
693 let len = state
694 .compress_to_buf(
695 &[1, 2, 3, 1, 2, 3, 1, 2, 3, 4],
696 &mut out,
697 CompressionLevel::Level1,
698 )
699 .unwrap();
700 assert_eq!(len, out.len());
701 assert_eq!(out, [0x02, 1, 2, 3, 0x80, 0x02, 0x00, 4]);
702 }
703 }
704
705 #[test]
706 fn test_rehash_at_boundary() {
707 {
708 let mut state = CompressState::new();
709 let mut out = [0u8; 8];
710 let len = state
711 .compress_to_buf(
712 &[1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3, 2, 3],
713 &mut out,
714 CompressionLevel::Level1,
715 )
716 .unwrap();
717 assert_eq!(len, out.len());
718 assert_eq!(out, [0x02, 1, 2, 3, 0x80, 0x02, 0x40, 0x01]);
719 }
720 {
721 let mut state = CompressState::new();
722 let mut out = [0u8; 8];
723 let len = state
724 .compress_to_buf(
725 &[1, 2, 3, 1, 2, 3, 1, 2, 3, 3, 3, 3, 3],
726 &mut out,
727 CompressionLevel::Level1,
728 )
729 .unwrap();
730 assert_eq!(len, out.len());
731 assert_eq!(out, [0x02, 1, 2, 3, 0x80, 0x02, 0x40, 0x00]);
732 }
733 }
734
735 #[cfg(feature = "std")]
736 #[test]
737 fn test_lv1_against_ref() {
738 extern crate std;
739
740 let d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
741 let inp_fn = d.join("src/compress.rs");
742
743 let inp = std::fs::read(inp_fn).unwrap();
744 let mut comp_state = CompressState::new();
745 let out = comp_state
746 .compress_to_vec(&inp, CompressionLevel::Level1)
747 .unwrap();
748
749 let mut reference = crate::wasmtester::FastLZWasm::new();
750 let check = reference.fastlz_decompress(&out);
751
752 assert_eq!(inp, check);
753 }
754
755 #[cfg(feature = "std")]
756 #[test]
757 fn test_lv2_against_ref() {
758 extern crate std;
759
760 let d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
761 let inp_fn = d.join("src/compress.rs");
762
763 let inp = std::fs::read(inp_fn).unwrap();
764 let mut comp_state = CompressState::new();
765 let out = comp_state
766 .compress_to_vec(&inp, CompressionLevel::Level2)
767 .unwrap();
768
769 let mut reference = crate::wasmtester::FastLZWasm::new();
770 let check = reference.fastlz_decompress(&out);
771
772 assert_eq!(inp, check);
773 }
774}