1use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10use core::convert::TryInto;
11use core::num::NonZeroUsize;
12
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17
18const MIN_MATCH_LEN: usize = 5;
19const FAST_HASH_FILL_STEP: usize = 3;
20const DFAST_MIN_MATCH_LEN: usize = 6;
21const DFAST_TARGET_LEN: usize = 48;
22const DFAST_HASH_BITS: usize = 20;
25const DFAST_SEARCH_DEPTH: usize = 4;
26const DFAST_DEFAULT_WINDOW_SIZE: usize = 1 << 22;
27const BETTER_DEFAULT_WINDOW_SIZE: usize = 1 << 23;
28const DFAST_EMPTY_SLOT: usize = usize::MAX;
29
30const HC_HASH_LOG: usize = 20;
31const HC_CHAIN_LOG: usize = 19;
32const HC_SEARCH_DEPTH: usize = 16;
33const HC_MIN_MATCH_LEN: usize = 5;
34const HC_TARGET_LEN: usize = 48;
35const HC_EMPTY: u32 = 0;
38
39const BEST_DEFAULT_WINDOW_SIZE: usize = 1 << 24;
40const MAX_HC_SEARCH_DEPTH: usize = 32;
43
44#[derive(Copy, Clone)]
47struct HcConfig {
48 hash_log: usize,
49 chain_log: usize,
50 search_depth: usize,
51 target_len: usize,
52}
53
54const HC_CONFIG: HcConfig = HcConfig {
55 hash_log: HC_HASH_LOG,
56 chain_log: HC_CHAIN_LOG,
57 search_depth: HC_SEARCH_DEPTH,
58 target_len: HC_TARGET_LEN,
59};
60
61const BEST_HC_CONFIG: HcConfig = HcConfig {
63 hash_log: 21,
64 chain_log: 20,
65 search_depth: 32,
66 target_len: 128,
67};
68
69#[derive(Copy, Clone, Debug, PartialEq, Eq)]
70enum MatcherBackend {
71 Simple,
72 Dfast,
73 HashChain,
74}
75
76pub struct MatchGeneratorDriver {
78 vec_pool: Vec<Vec<u8>>,
79 suffix_pool: Vec<SuffixStore>,
80 match_generator: MatchGenerator,
81 dfast_match_generator: Option<DfastMatchGenerator>,
82 hc_match_generator: Option<HcMatchGenerator>,
83 active_backend: MatcherBackend,
84 slice_size: usize,
85 base_slice_size: usize,
86 base_window_size: usize,
87 reported_window_size: usize,
90 dictionary_retained_budget: usize,
93}
94
95impl MatchGeneratorDriver {
96 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
99 let max_window_size = max_slices_in_window * slice_size;
100 Self {
101 vec_pool: Vec::new(),
102 suffix_pool: Vec::new(),
103 match_generator: MatchGenerator::new(max_window_size),
104 dfast_match_generator: None,
105 hc_match_generator: None,
106 active_backend: MatcherBackend::Simple,
107 slice_size,
108 base_slice_size: slice_size,
109 base_window_size: max_window_size,
110 reported_window_size: max_window_size,
111 dictionary_retained_budget: 0,
112 }
113 }
114
115 fn level_config(&self, level: CompressionLevel) -> (MatcherBackend, usize, usize, usize) {
116 match level {
117 CompressionLevel::Uncompressed => (
118 MatcherBackend::Simple,
119 self.base_slice_size,
120 self.base_window_size,
121 1,
122 ),
123 CompressionLevel::Fastest => (
124 MatcherBackend::Simple,
125 self.base_slice_size,
126 self.base_window_size,
127 FAST_HASH_FILL_STEP,
128 ),
129 CompressionLevel::Default => (
130 MatcherBackend::Dfast,
131 self.base_slice_size,
132 DFAST_DEFAULT_WINDOW_SIZE,
133 1,
134 ),
135 CompressionLevel::Better => (
136 MatcherBackend::HashChain,
137 self.base_slice_size,
138 BETTER_DEFAULT_WINDOW_SIZE,
139 1,
140 ),
141 CompressionLevel::Best => (
142 MatcherBackend::HashChain,
143 self.base_slice_size,
144 BEST_DEFAULT_WINDOW_SIZE,
145 1,
146 ),
147 }
148 }
149
150 fn dfast_matcher(&self) -> &DfastMatchGenerator {
151 self.dfast_match_generator
152 .as_ref()
153 .expect("dfast backend must be initialized by reset() before use")
154 }
155
156 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
157 self.dfast_match_generator
158 .as_mut()
159 .expect("dfast backend must be initialized by reset() before use")
160 }
161
162 fn hc_matcher(&self) -> &HcMatchGenerator {
163 self.hc_match_generator
164 .as_ref()
165 .expect("hash chain backend must be initialized by reset() before use")
166 }
167
168 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
169 self.hc_match_generator
170 .as_mut()
171 .expect("hash chain backend must be initialized by reset() before use")
172 }
173
174 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
175 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
176 if reclaimed == 0 {
177 return;
178 }
179 self.dictionary_retained_budget -= reclaimed;
180 match self.active_backend {
181 MatcherBackend::Simple => {
182 self.match_generator.max_window_size = self
183 .match_generator
184 .max_window_size
185 .saturating_sub(reclaimed);
186 }
187 MatcherBackend::Dfast => {
188 let matcher = self.dfast_matcher_mut();
189 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
190 }
191 MatcherBackend::HashChain => {
192 let matcher = self.hc_matcher_mut();
193 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
194 }
195 }
196 }
197
198 fn trim_after_budget_retire(&mut self) {
199 loop {
200 let mut evicted_bytes = 0usize;
201 match self.active_backend {
202 MatcherBackend::Simple => {
203 let vec_pool = &mut self.vec_pool;
204 let suffix_pool = &mut self.suffix_pool;
205 self.match_generator.reserve(0, |mut data, mut suffixes| {
206 evicted_bytes += data.len();
207 data.resize(data.capacity(), 0);
208 vec_pool.push(data);
209 suffixes.slots.clear();
210 suffixes.slots.resize(suffixes.slots.capacity(), None);
211 suffix_pool.push(suffixes);
212 });
213 }
214 MatcherBackend::Dfast => {
215 let mut retired = Vec::new();
216 self.dfast_matcher_mut().trim_to_window(|data| {
217 evicted_bytes += data.len();
218 retired.push(data);
219 });
220 for mut data in retired {
221 data.resize(data.capacity(), 0);
222 self.vec_pool.push(data);
223 }
224 }
225 MatcherBackend::HashChain => {
226 let mut retired = Vec::new();
227 self.hc_matcher_mut().trim_to_window(|data| {
228 evicted_bytes += data.len();
229 retired.push(data);
230 });
231 for mut data in retired {
232 data.resize(data.capacity(), 0);
233 self.vec_pool.push(data);
234 }
235 }
236 }
237 if evicted_bytes == 0 {
238 break;
239 }
240 self.retire_dictionary_budget(evicted_bytes);
241 }
242 }
243}
244
245impl Matcher for MatchGeneratorDriver {
246 fn supports_dictionary_priming(&self) -> bool {
247 true
248 }
249
250 fn reset(&mut self, level: CompressionLevel) {
251 let (backend, slice_size, max_window_size, hash_fill_step) = self.level_config(level);
252 self.dictionary_retained_budget = 0;
253 if self.active_backend != backend {
254 match self.active_backend {
255 MatcherBackend::Simple => {
256 let vec_pool = &mut self.vec_pool;
257 let suffix_pool = &mut self.suffix_pool;
258 self.match_generator.reset(|mut data, mut suffixes| {
259 data.resize(data.capacity(), 0);
260 vec_pool.push(data);
261 suffixes.slots.clear();
262 suffixes.slots.resize(suffixes.slots.capacity(), None);
263 suffix_pool.push(suffixes);
264 });
265 }
266 MatcherBackend::Dfast => {
267 if let Some(dfast) = self.dfast_match_generator.as_mut() {
268 let vec_pool = &mut self.vec_pool;
269 dfast.reset(|mut data| {
270 data.resize(data.capacity(), 0);
271 vec_pool.push(data);
272 });
273 }
274 }
275 MatcherBackend::HashChain => {
276 if let Some(hc) = self.hc_match_generator.as_mut() {
277 hc.hash_table = Vec::new();
280 hc.chain_table = Vec::new();
281 let vec_pool = &mut self.vec_pool;
282 hc.reset(|mut data| {
283 data.resize(data.capacity(), 0);
284 vec_pool.push(data);
285 });
286 }
287 }
288 }
289 }
290
291 self.active_backend = backend;
292 self.slice_size = slice_size;
293 self.reported_window_size = max_window_size;
294 match self.active_backend {
295 MatcherBackend::Simple => {
296 let vec_pool = &mut self.vec_pool;
297 let suffix_pool = &mut self.suffix_pool;
298 self.match_generator.max_window_size = max_window_size;
299 self.match_generator.hash_fill_step = hash_fill_step;
300 self.match_generator.reset(|mut data, mut suffixes| {
301 data.resize(data.capacity(), 0);
302 vec_pool.push(data);
303 suffixes.slots.clear();
304 suffixes.slots.resize(suffixes.slots.capacity(), None);
305 suffix_pool.push(suffixes);
306 });
307 }
308 MatcherBackend::Dfast => {
309 let dfast = self
310 .dfast_match_generator
311 .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
312 dfast.max_window_size = max_window_size;
313 dfast.lazy_depth = 1;
314 let vec_pool = &mut self.vec_pool;
315 dfast.reset(|mut data| {
316 data.resize(data.capacity(), 0);
317 vec_pool.push(data);
318 });
319 }
320 MatcherBackend::HashChain => {
321 let hc = self
322 .hc_match_generator
323 .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
324 hc.max_window_size = max_window_size;
325 hc.lazy_depth = 2;
326 match level {
327 CompressionLevel::Best => hc.configure(BEST_HC_CONFIG),
328 _ => hc.configure(HC_CONFIG),
329 }
330 let vec_pool = &mut self.vec_pool;
331 hc.reset(|mut data| {
332 data.resize(data.capacity(), 0);
333 vec_pool.push(data);
334 });
335 }
336 }
337 }
338
339 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
340 match self.active_backend {
341 MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
342 MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
343 MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
344 }
345
346 if dict_content.is_empty() {
347 return;
348 }
349
350 let retained_dict_budget = dict_content.len();
353 match self.active_backend {
354 MatcherBackend::Simple => {
355 self.match_generator.max_window_size = self
356 .match_generator
357 .max_window_size
358 .saturating_add(retained_dict_budget);
359 }
360 MatcherBackend::Dfast => {
361 let matcher = self.dfast_matcher_mut();
362 matcher.max_window_size =
363 matcher.max_window_size.saturating_add(retained_dict_budget);
364 }
365 MatcherBackend::HashChain => {
366 let matcher = self.hc_matcher_mut();
367 matcher.max_window_size =
368 matcher.max_window_size.saturating_add(retained_dict_budget);
369 }
370 }
371
372 let mut start = 0usize;
373 let mut committed_dict_budget = 0usize;
374 let min_primed_tail = match self.active_backend {
378 MatcherBackend::Simple => MIN_MATCH_LEN,
379 MatcherBackend::Dfast | MatcherBackend::HashChain => 4,
380 };
381 while start < dict_content.len() {
382 let end = (start + self.slice_size).min(dict_content.len());
383 if end - start < min_primed_tail {
384 break;
385 }
386 let mut space = self.get_next_space();
387 space.clear();
388 space.extend_from_slice(&dict_content[start..end]);
389 self.commit_space(space);
390 self.skip_matching();
391 committed_dict_budget += end - start;
392 start = end;
393 }
394
395 let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
396 if uncommitted_tail_budget > 0 {
397 match self.active_backend {
398 MatcherBackend::Simple => {
399 self.match_generator.max_window_size = self
400 .match_generator
401 .max_window_size
402 .saturating_sub(uncommitted_tail_budget);
403 }
404 MatcherBackend::Dfast => {
405 let matcher = self.dfast_matcher_mut();
406 matcher.max_window_size = matcher
407 .max_window_size
408 .saturating_sub(uncommitted_tail_budget);
409 }
410 MatcherBackend::HashChain => {
411 let matcher = self.hc_matcher_mut();
412 matcher.max_window_size = matcher
413 .max_window_size
414 .saturating_sub(uncommitted_tail_budget);
415 }
416 }
417 }
418 if committed_dict_budget > 0 {
419 self.dictionary_retained_budget = self
420 .dictionary_retained_budget
421 .saturating_add(committed_dict_budget);
422 }
423 }
424
425 fn window_size(&self) -> u64 {
426 self.reported_window_size as u64
427 }
428
429 fn get_next_space(&mut self) -> Vec<u8> {
430 self.vec_pool.pop().unwrap_or_else(|| {
431 let mut space = alloc::vec![0; self.slice_size];
432 space.resize(space.capacity(), 0);
433 space
434 })
435 }
436
437 fn get_last_space(&mut self) -> &[u8] {
438 match self.active_backend {
439 MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
440 MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
441 MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
442 }
443 }
444
445 fn commit_space(&mut self, space: Vec<u8>) {
446 match self.active_backend {
447 MatcherBackend::Simple => {
448 let vec_pool = &mut self.vec_pool;
449 let mut evicted_bytes = 0usize;
450 let suffixes = self
451 .suffix_pool
452 .pop()
453 .unwrap_or_else(|| SuffixStore::with_capacity(space.len()));
454 let suffix_pool = &mut self.suffix_pool;
455 self.match_generator
456 .add_data(space, suffixes, |mut data, mut suffixes| {
457 evicted_bytes += data.len();
458 data.resize(data.capacity(), 0);
459 vec_pool.push(data);
460 suffixes.slots.clear();
461 suffixes.slots.resize(suffixes.slots.capacity(), None);
462 suffix_pool.push(suffixes);
463 });
464 self.retire_dictionary_budget(evicted_bytes);
465 self.trim_after_budget_retire();
466 }
467 MatcherBackend::Dfast => {
468 let vec_pool = &mut self.vec_pool;
469 let mut evicted_bytes = 0usize;
470 self.dfast_match_generator
471 .as_mut()
472 .expect("dfast backend must be initialized by reset() before use")
473 .add_data(space, |mut data| {
474 evicted_bytes += data.len();
475 data.resize(data.capacity(), 0);
476 vec_pool.push(data);
477 });
478 self.retire_dictionary_budget(evicted_bytes);
479 self.trim_after_budget_retire();
480 }
481 MatcherBackend::HashChain => {
482 let vec_pool = &mut self.vec_pool;
483 let mut evicted_bytes = 0usize;
484 self.hc_match_generator
485 .as_mut()
486 .expect("hash chain backend must be initialized by reset() before use")
487 .add_data(space, |mut data| {
488 evicted_bytes += data.len();
489 data.resize(data.capacity(), 0);
490 vec_pool.push(data);
491 });
492 self.retire_dictionary_budget(evicted_bytes);
493 self.trim_after_budget_retire();
494 }
495 }
496 }
497
498 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
499 match self.active_backend {
500 MatcherBackend::Simple => {
501 while self.match_generator.next_sequence(&mut handle_sequence) {}
502 }
503 MatcherBackend::Dfast => self
504 .dfast_matcher_mut()
505 .start_matching(&mut handle_sequence),
506 MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
507 }
508 }
509 fn skip_matching(&mut self) {
510 match self.active_backend {
511 MatcherBackend::Simple => self.match_generator.skip_matching(),
512 MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
513 MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(),
514 }
515 }
516}
517
518struct SuffixStore {
521 slots: Vec<Option<NonZeroUsize>>,
525 len_log: u32,
526}
527
528impl SuffixStore {
529 fn with_capacity(capacity: usize) -> Self {
530 Self {
531 slots: alloc::vec![None; capacity],
532 len_log: capacity.ilog2(),
533 }
534 }
535
536 #[inline(always)]
537 fn insert(&mut self, suffix: &[u8], idx: usize) {
538 let key = self.key(suffix);
539 self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
540 }
541
542 #[inline(always)]
543 fn contains_key(&self, suffix: &[u8]) -> bool {
544 let key = self.key(suffix);
545 self.slots[key].is_some()
546 }
547
548 #[inline(always)]
549 fn get(&self, suffix: &[u8]) -> Option<usize> {
550 let key = self.key(suffix);
551 self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
552 }
553
554 #[inline(always)]
555 fn key(&self, suffix: &[u8]) -> usize {
556 if self.len_log == 0 {
558 return 0;
559 }
560
561 let s0 = suffix[0] as u64;
562 let s1 = suffix[1] as u64;
563 let s2 = suffix[2] as u64;
564 let s3 = suffix[3] as u64;
565 let s4 = suffix[4] as u64;
566
567 const POLY: u64 = 0xCF3BCCDCABu64;
568
569 let s0 = (s0 << 24).wrapping_mul(POLY);
570 let s1 = (s1 << 32).wrapping_mul(POLY);
571 let s2 = (s2 << 40).wrapping_mul(POLY);
572 let s3 = (s3 << 48).wrapping_mul(POLY);
573 let s4 = (s4 << 56).wrapping_mul(POLY);
574
575 let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
576 let index = index >> (64 - self.len_log);
577 index as usize % self.slots.len()
578 }
579}
580
581struct WindowEntry {
584 data: Vec<u8>,
585 suffixes: SuffixStore,
587 base_offset: usize,
589}
590
591pub(crate) struct MatchGenerator {
592 max_window_size: usize,
593 window: Vec<WindowEntry>,
596 window_size: usize,
597 #[cfg(debug_assertions)]
598 concat_window: Vec<u8>,
599 suffix_idx: usize,
601 last_idx_in_sequence: usize,
603 hash_fill_step: usize,
604 offset_hist: [u32; 3],
605}
606
607impl MatchGenerator {
608 #[inline(always)]
609 #[cfg(target_endian = "little")]
610 fn mismatch_byte_index(diff: usize) -> usize {
611 diff.trailing_zeros() as usize / 8
612 }
613
614 #[inline(always)]
615 #[cfg(target_endian = "big")]
616 fn mismatch_byte_index(diff: usize) -> usize {
617 diff.leading_zeros() as usize / 8
618 }
619
620 fn new(max_size: usize) -> Self {
622 Self {
623 max_window_size: max_size,
624 window: Vec::new(),
625 window_size: 0,
626 #[cfg(debug_assertions)]
627 concat_window: Vec::new(),
628 suffix_idx: 0,
629 last_idx_in_sequence: 0,
630 hash_fill_step: 1,
631 offset_hist: [1, 4, 8],
632 }
633 }
634
635 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
636 self.window_size = 0;
637 #[cfg(debug_assertions)]
638 self.concat_window.clear();
639 self.suffix_idx = 0;
640 self.last_idx_in_sequence = 0;
641 self.offset_hist = [1, 4, 8];
642 self.window.drain(..).for_each(|entry| {
643 reuse_space(entry.data, entry.suffixes);
644 });
645 }
646
647 fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
652 loop {
653 let last_entry = self.window.last().unwrap();
654 let data_slice = &last_entry.data;
655
656 if self.suffix_idx >= data_slice.len() {
658 if self.last_idx_in_sequence != self.suffix_idx {
659 let literals = &data_slice[self.last_idx_in_sequence..];
660 self.last_idx_in_sequence = self.suffix_idx;
661 handle_sequence(Sequence::Literals { literals });
662 return true;
663 } else {
664 return false;
665 }
666 }
667
668 let data_slice = &data_slice[self.suffix_idx..];
670 if data_slice.len() < MIN_MATCH_LEN {
671 let last_idx_in_sequence = self.last_idx_in_sequence;
672 self.last_idx_in_sequence = last_entry.data.len();
673 self.suffix_idx = last_entry.data.len();
674 handle_sequence(Sequence::Literals {
675 literals: &last_entry.data[last_idx_in_sequence..],
676 });
677 return true;
678 }
679
680 let key = &data_slice[..MIN_MATCH_LEN];
682 let literals_len = self.suffix_idx - self.last_idx_in_sequence;
683
684 let mut candidate = self.repcode_candidate(data_slice, literals_len);
686 for match_entry in self.window.iter() {
687 if let Some(match_index) = match_entry.suffixes.get(key) {
688 let match_slice = &match_entry.data[match_index..];
689
690 let match_len = Self::common_prefix_len(match_slice, data_slice);
692
693 if match_len >= MIN_MATCH_LEN {
695 let offset = match_entry.base_offset + self.suffix_idx - match_index;
696
697 #[cfg(debug_assertions)]
699 {
700 let unprocessed = last_entry.data.len() - self.suffix_idx;
701 let start = self.concat_window.len() - unprocessed - offset;
702 let end = start + match_len;
703 let check_slice = &self.concat_window[start..end];
704 debug_assert_eq!(check_slice, &match_slice[..match_len]);
705 }
706
707 if let Some((old_offset, old_match_len)) = candidate {
708 if match_len > old_match_len
709 || (match_len == old_match_len && offset < old_offset)
710 {
711 candidate = Some((offset, match_len));
712 }
713 } else {
714 candidate = Some((offset, match_len));
715 }
716 }
717 }
718 }
719
720 if let Some((offset, match_len)) = candidate {
721 self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
724
725 let last_entry = self.window.last().unwrap();
727 let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
728
729 self.suffix_idx += match_len;
731 self.last_idx_in_sequence = self.suffix_idx;
732 let _ = encode_offset_with_history(
733 offset as u32,
734 literals.len() as u32,
735 &mut self.offset_hist,
736 );
737 handle_sequence(Sequence::Triple {
738 literals,
739 offset,
740 match_len,
741 });
742
743 return true;
744 }
745
746 let last_entry = self.window.last_mut().unwrap();
747 let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
748 if !last_entry.suffixes.contains_key(key) {
749 last_entry.suffixes.insert(key, self.suffix_idx);
750 }
751 self.suffix_idx += 1;
752 }
753 }
754
755 #[inline(always)]
757 fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
758 let max = a.len().min(b.len());
759 let chunk = core::mem::size_of::<usize>();
760 let mut off = 0usize;
761 let lhs = a.as_ptr();
762 let rhs = b.as_ptr();
763
764 while off + chunk <= max {
765 let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
766 let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
767 let diff = lhs_word ^ rhs_word;
768 if diff != 0 {
769 return off + Self::mismatch_byte_index(diff);
770 }
771 off += chunk;
772 }
773
774 off + core::iter::zip(&a[off..max], &b[off..max])
775 .take_while(|(x, y)| x == y)
776 .count()
777 }
778
779 #[inline(always)]
781 fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
782 let start = self.suffix_idx;
783 let last_entry = self.window.last_mut().unwrap();
784 if last_entry.data.len() < MIN_MATCH_LEN {
785 return;
786 }
787 let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
788 if insert_limit > start {
789 let data = last_entry.data.as_slice();
790 let suffixes = &mut last_entry.suffixes;
791 if fill_step == FAST_HASH_FILL_STEP {
792 Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
793 } else {
794 let mut pos = start;
795 while pos < insert_limit {
796 Self::insert_suffix_if_absent(data, suffixes, pos);
797 pos += fill_step;
798 }
799 }
800 }
801
802 if idx >= start + MIN_MATCH_LEN {
803 let tail_start = idx - MIN_MATCH_LEN;
804 let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
805 if !last_entry.suffixes.contains_key(tail_key) {
806 last_entry.suffixes.insert(tail_key, tail_start);
807 }
808 }
809 }
810
811 #[inline(always)]
812 fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
813 debug_assert!(
814 pos + MIN_MATCH_LEN <= data.len(),
815 "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
816 pos,
817 MIN_MATCH_LEN,
818 data.len()
819 );
820 let key = &data[pos..pos + MIN_MATCH_LEN];
821 if !suffixes.contains_key(key) {
822 suffixes.insert(key, pos);
823 }
824 }
825
826 #[inline(always)]
827 fn add_suffixes_interleaved_fast(
828 data: &[u8],
829 suffixes: &mut SuffixStore,
830 start: usize,
831 insert_limit: usize,
832 ) {
833 let lane = FAST_HASH_FILL_STEP;
834 let mut pos = start;
835
836 while pos + lane * 3 < insert_limit {
839 let p0 = pos;
840 let p1 = pos + lane;
841 let p2 = pos + lane * 2;
842 let p3 = pos + lane * 3;
843
844 Self::insert_suffix_if_absent(data, suffixes, p0);
845 Self::insert_suffix_if_absent(data, suffixes, p1);
846 Self::insert_suffix_if_absent(data, suffixes, p2);
847 Self::insert_suffix_if_absent(data, suffixes, p3);
848
849 pos += lane * 4;
850 }
851
852 while pos < insert_limit {
853 Self::insert_suffix_if_absent(data, suffixes, pos);
854 pos += lane;
855 }
856 }
857
858 fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
859 if literals_len != 0 {
860 return None;
861 }
862
863 let reps = [
864 Some(self.offset_hist[1] as usize),
865 Some(self.offset_hist[2] as usize),
866 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
867 ];
868
869 let mut best: Option<(usize, usize)> = None;
870 let mut seen = [0usize; 3];
871 let mut seen_len = 0usize;
872 for offset in reps.into_iter().flatten() {
873 if offset == 0 {
874 continue;
875 }
876 if seen[..seen_len].contains(&offset) {
877 continue;
878 }
879 seen[seen_len] = offset;
880 seen_len += 1;
881
882 let Some(match_len) = self.offset_match_len(offset, data_slice) else {
883 continue;
884 };
885 if match_len < MIN_MATCH_LEN {
886 continue;
887 }
888
889 if best.is_none_or(|(old_offset, old_len)| {
890 match_len > old_len || (match_len == old_len && offset < old_offset)
891 }) {
892 best = Some((offset, match_len));
893 }
894 }
895 best
896 }
897
898 fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
899 if offset == 0 {
900 return None;
901 }
902
903 let last_idx = self.window.len().checked_sub(1)?;
904 let last_entry = &self.window[last_idx];
905 let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
906 if offset > searchable_prefix {
907 return None;
908 }
909
910 let mut remaining = offset;
911 let (entry_idx, match_index) = if remaining <= self.suffix_idx {
912 (last_idx, self.suffix_idx - remaining)
913 } else {
914 remaining -= self.suffix_idx;
915 let mut found = None;
916 for entry_idx in (0..last_idx).rev() {
917 let len = self.window[entry_idx].data.len();
918 if remaining <= len {
919 found = Some((entry_idx, len - remaining));
920 break;
921 }
922 remaining -= len;
923 }
924 found?
925 };
926
927 let match_entry = &self.window[entry_idx];
928 let match_slice = &match_entry.data[match_index..];
929
930 Some(Self::common_prefix_len(match_slice, data_slice))
931 }
932
933 fn skip_matching(&mut self) {
935 let len = self.window.last().unwrap().data.len();
936 self.add_suffixes_till(len, 1);
937 self.suffix_idx = len;
938 self.last_idx_in_sequence = len;
939 }
940
941 fn add_data(
944 &mut self,
945 data: Vec<u8>,
946 suffixes: SuffixStore,
947 reuse_space: impl FnMut(Vec<u8>, SuffixStore),
948 ) {
949 assert!(
950 self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
951 );
952 self.reserve(data.len(), reuse_space);
953 #[cfg(debug_assertions)]
954 self.concat_window.extend_from_slice(&data);
955
956 if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
957 for entry in self.window.iter_mut() {
958 entry.base_offset += last_len;
959 }
960 }
961
962 let len = data.len();
963 self.window.push(WindowEntry {
964 data,
965 suffixes,
966 base_offset: 0,
967 });
968 self.window_size += len;
969 self.suffix_idx = 0;
970 self.last_idx_in_sequence = 0;
971 }
972
973 fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
976 assert!(self.max_window_size >= amount);
977 while self.window_size + amount > self.max_window_size {
978 let removed = self.window.remove(0);
979 self.window_size -= removed.data.len();
980 #[cfg(debug_assertions)]
981 self.concat_window.drain(0..removed.data.len());
982
983 let WindowEntry {
984 suffixes,
985 data: leaked_vec,
986 base_offset: _,
987 } = removed;
988 reuse_space(leaked_vec, suffixes);
989 }
990 }
991}
992
993struct DfastMatchGenerator {
994 max_window_size: usize,
995 window: VecDeque<Vec<u8>>,
996 window_size: usize,
997 history: Vec<u8>,
1000 history_start: usize,
1001 history_abs_start: usize,
1002 offset_hist: [u32; 3],
1003 short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1004 long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1005 lazy_depth: u8,
1007}
1008
1009#[derive(Copy, Clone)]
1010struct MatchCandidate {
1011 start: usize,
1012 offset: usize,
1013 match_len: usize,
1014}
1015
1016impl DfastMatchGenerator {
1017 fn new(max_window_size: usize) -> Self {
1018 Self {
1019 max_window_size,
1020 window: VecDeque::new(),
1021 window_size: 0,
1022 history: Vec::new(),
1023 history_start: 0,
1024 history_abs_start: 0,
1025 offset_hist: [1, 4, 8],
1026 short_hash: Vec::new(),
1027 long_hash: Vec::new(),
1028 lazy_depth: 1,
1029 }
1030 }
1031
1032 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1033 self.window_size = 0;
1034 self.history.clear();
1035 self.history_start = 0;
1036 self.history_abs_start = 0;
1037 self.offset_hist = [1, 4, 8];
1038 if !self.short_hash.is_empty() {
1039 self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1040 self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1041 }
1042 for mut data in self.window.drain(..) {
1043 data.resize(data.capacity(), 0);
1044 reuse_space(data);
1045 }
1046 }
1047
1048 fn get_last_space(&self) -> &[u8] {
1049 self.window.back().unwrap().as_slice()
1050 }
1051
1052 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1053 assert!(data.len() <= self.max_window_size);
1054 while self.window_size + data.len() > self.max_window_size {
1055 let removed = self.window.pop_front().unwrap();
1056 self.window_size -= removed.len();
1057 self.history_start += removed.len();
1058 self.history_abs_start += removed.len();
1059 reuse_space(removed);
1060 }
1061 self.compact_history();
1062 self.history.extend_from_slice(&data);
1063 self.window_size += data.len();
1064 self.window.push_back(data);
1065 }
1066
1067 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1068 while self.window_size > self.max_window_size {
1069 let removed = self.window.pop_front().unwrap();
1070 self.window_size -= removed.len();
1071 self.history_start += removed.len();
1072 self.history_abs_start += removed.len();
1073 reuse_space(removed);
1074 }
1075 }
1076
1077 fn skip_matching(&mut self) {
1078 self.ensure_hash_tables();
1079 let current_len = self.window.back().unwrap().len();
1080 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1081 self.insert_positions(current_abs_start, current_abs_start + current_len);
1082 }
1083
1084 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1085 self.ensure_hash_tables();
1086
1087 let current_len = self.window.back().unwrap().len();
1088 if current_len == 0 {
1089 return;
1090 }
1091
1092 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1093
1094 let mut pos = 0usize;
1095 let mut literals_start = 0usize;
1096 while pos + DFAST_MIN_MATCH_LEN <= current_len {
1097 let abs_pos = current_abs_start + pos;
1098 let lit_len = pos - literals_start;
1099
1100 let best = self.best_match(abs_pos, lit_len);
1101 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1102 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1103 let current = self.window.back().unwrap().as_slice();
1104 let start = candidate.start - current_abs_start;
1105 let literals = ¤t[literals_start..start];
1106 handle_sequence(Sequence::Triple {
1107 literals,
1108 offset: candidate.offset,
1109 match_len: candidate.match_len,
1110 });
1111 let _ = encode_offset_with_history(
1114 candidate.offset as u32,
1115 literals.len() as u32,
1116 &mut self.offset_hist,
1117 );
1118 pos = start + candidate.match_len;
1119 literals_start = pos;
1120 } else {
1121 self.insert_position(abs_pos);
1122 pos += 1;
1123 }
1124 }
1125
1126 if literals_start < current_len {
1127 let current = self.window.back().unwrap().as_slice();
1134 handle_sequence(Sequence::Literals {
1135 literals: ¤t[literals_start..],
1136 });
1137 }
1138 }
1139
1140 fn ensure_hash_tables(&mut self) {
1141 if self.short_hash.is_empty() {
1142 self.short_hash =
1146 alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
1147 self.long_hash =
1148 alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
1149 }
1150 }
1151
1152 fn compact_history(&mut self) {
1153 if self.history_start == 0 {
1154 return;
1155 }
1156 if self.history_start >= self.max_window_size
1157 || self.history_start * 2 >= self.history.len()
1158 {
1159 self.history.drain(..self.history_start);
1160 self.history_start = 0;
1161 }
1162 }
1163
1164 fn live_history(&self) -> &[u8] {
1165 &self.history[self.history_start..]
1166 }
1167
1168 fn history_abs_end(&self) -> usize {
1169 self.history_abs_start + self.live_history().len()
1170 }
1171
1172 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1173 let rep = self.repcode_candidate(abs_pos, lit_len);
1174 let hash = self.hash_candidate(abs_pos, lit_len);
1175 Self::better_candidate(rep, hash)
1176 }
1177
1178 fn pick_lazy_match(
1179 &self,
1180 abs_pos: usize,
1181 lit_len: usize,
1182 best: Option<MatchCandidate>,
1183 ) -> Option<MatchCandidate> {
1184 let best = best?;
1185 if best.match_len >= DFAST_TARGET_LEN
1186 || abs_pos + 1 + DFAST_MIN_MATCH_LEN > self.history_abs_end()
1187 {
1188 return Some(best);
1189 }
1190
1191 let next = self.best_match(abs_pos + 1, lit_len + 1);
1193 if let Some(next) = next
1194 && (next.match_len > best.match_len
1195 || (next.match_len == best.match_len && next.offset < best.offset))
1196 {
1197 return None;
1198 }
1199
1200 if self.lazy_depth >= 2 && abs_pos + 2 + DFAST_MIN_MATCH_LEN <= self.history_abs_end() {
1202 let next2 = self.best_match(abs_pos + 2, lit_len + 2);
1203 if let Some(next2) = next2
1204 && next2.match_len > best.match_len + 1
1205 {
1206 return None;
1207 }
1208 }
1209
1210 Some(best)
1211 }
1212
1213 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1214 let reps = if lit_len == 0 {
1215 [
1216 Some(self.offset_hist[1] as usize),
1217 Some(self.offset_hist[2] as usize),
1218 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1219 ]
1220 } else {
1221 [
1222 Some(self.offset_hist[0] as usize),
1223 Some(self.offset_hist[1] as usize),
1224 Some(self.offset_hist[2] as usize),
1225 ]
1226 };
1227
1228 let mut best = None;
1229 for rep in reps.into_iter().flatten() {
1230 if rep == 0 || rep > abs_pos {
1231 continue;
1232 }
1233 let candidate_pos = abs_pos - rep;
1234 if candidate_pos < self.history_abs_start {
1235 continue;
1236 }
1237 let concat = self.live_history();
1238 let candidate_idx = candidate_pos - self.history_abs_start;
1239 let current_idx = abs_pos - self.history_abs_start;
1240 if current_idx + DFAST_MIN_MATCH_LEN > concat.len() {
1241 continue;
1242 }
1243 let match_len =
1244 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1245 if match_len >= DFAST_MIN_MATCH_LEN {
1246 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1247 best = Self::better_candidate(best, Some(candidate));
1248 }
1249 }
1250 best
1251 }
1252
1253 fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1254 let concat = self.live_history();
1255 let current_idx = abs_pos - self.history_abs_start;
1256 let mut best = None;
1257 for candidate_pos in self.long_candidates(abs_pos) {
1258 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1259 continue;
1260 }
1261 let candidate_idx = candidate_pos - self.history_abs_start;
1262 let match_len =
1263 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1264 if match_len >= DFAST_MIN_MATCH_LEN {
1265 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1266 best = Self::better_candidate(best, Some(candidate));
1267 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1268 return best;
1269 }
1270 }
1271 }
1272
1273 for candidate_pos in self.short_candidates(abs_pos) {
1274 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1275 continue;
1276 }
1277 let candidate_idx = candidate_pos - self.history_abs_start;
1278 let match_len =
1279 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1280 if match_len >= DFAST_MIN_MATCH_LEN {
1281 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1282 best = Self::better_candidate(best, Some(candidate));
1283 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1284 return best;
1285 }
1286 }
1287 }
1288 best
1289 }
1290
1291 fn extend_backwards(
1292 &self,
1293 mut candidate_pos: usize,
1294 mut abs_pos: usize,
1295 mut match_len: usize,
1296 lit_len: usize,
1297 ) -> MatchCandidate {
1298 let concat = self.live_history();
1299 let min_abs_pos = abs_pos - lit_len;
1300 while abs_pos > min_abs_pos
1301 && candidate_pos > self.history_abs_start
1302 && concat[candidate_pos - self.history_abs_start - 1]
1303 == concat[abs_pos - self.history_abs_start - 1]
1304 {
1305 candidate_pos -= 1;
1306 abs_pos -= 1;
1307 match_len += 1;
1308 }
1309 MatchCandidate {
1310 start: abs_pos,
1311 offset: abs_pos - candidate_pos,
1312 match_len,
1313 }
1314 }
1315
1316 fn better_candidate(
1317 lhs: Option<MatchCandidate>,
1318 rhs: Option<MatchCandidate>,
1319 ) -> Option<MatchCandidate> {
1320 match (lhs, rhs) {
1321 (None, other) | (other, None) => other,
1322 (Some(lhs), Some(rhs)) => {
1323 if rhs.match_len > lhs.match_len
1324 || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1325 {
1326 Some(rhs)
1327 } else {
1328 Some(lhs)
1329 }
1330 }
1331 }
1332 }
1333
1334 fn insert_positions(&mut self, start: usize, end: usize) {
1335 for pos in start..end {
1336 self.insert_position(pos);
1337 }
1338 }
1339
1340 fn insert_position(&mut self, pos: usize) {
1341 let idx = pos - self.history_abs_start;
1342 let short = {
1343 let concat = self.live_history();
1344 (idx + 4 <= concat.len()).then(|| Self::hash4(&concat[idx..]))
1345 };
1346 if let Some(short) = short {
1347 let bucket = &mut self.short_hash[short];
1348 if bucket[0] != pos {
1349 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1350 bucket[0] = pos;
1351 }
1352 }
1353
1354 let long = {
1355 let concat = self.live_history();
1356 (idx + 8 <= concat.len()).then(|| Self::hash8(&concat[idx..]))
1357 };
1358 if let Some(long) = long {
1359 let bucket = &mut self.long_hash[long];
1360 if bucket[0] != pos {
1361 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1362 bucket[0] = pos;
1363 }
1364 }
1365 }
1366
1367 fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1368 let concat = self.live_history();
1369 let idx = pos - self.history_abs_start;
1370 (idx + 4 <= concat.len())
1371 .then(|| self.short_hash[Self::hash4(&concat[idx..])])
1372 .into_iter()
1373 .flatten()
1374 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1375 }
1376
1377 fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1378 let concat = self.live_history();
1379 let idx = pos - self.history_abs_start;
1380 (idx + 8 <= concat.len())
1381 .then(|| self.long_hash[Self::hash8(&concat[idx..])])
1382 .into_iter()
1383 .flatten()
1384 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1385 }
1386
1387 fn hash4(data: &[u8]) -> usize {
1388 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1389 Self::hash_bits(value)
1390 }
1391
1392 fn hash8(data: &[u8]) -> usize {
1393 let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1394 Self::hash_bits(value)
1395 }
1396
1397 fn hash_bits(value: u64) -> usize {
1398 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1399 ((value.wrapping_mul(PRIME)) >> (64 - DFAST_HASH_BITS)) as usize
1400 }
1401}
1402
1403struct HcMatchGenerator {
1404 max_window_size: usize,
1405 window: VecDeque<Vec<u8>>,
1406 window_size: usize,
1407 history: Vec<u8>,
1408 history_start: usize,
1409 history_abs_start: usize,
1410 offset_hist: [u32; 3],
1411 hash_table: Vec<u32>,
1412 chain_table: Vec<u32>,
1413 lazy_depth: u8,
1414 hash_log: usize,
1415 chain_log: usize,
1416 search_depth: usize,
1417 target_len: usize,
1418}
1419
1420impl HcMatchGenerator {
1421 fn new(max_window_size: usize) -> Self {
1422 Self {
1423 max_window_size,
1424 window: VecDeque::new(),
1425 window_size: 0,
1426 history: Vec::new(),
1427 history_start: 0,
1428 history_abs_start: 0,
1429 offset_hist: [1, 4, 8],
1430 hash_table: Vec::new(),
1431 chain_table: Vec::new(),
1432 lazy_depth: 2,
1433 hash_log: HC_HASH_LOG,
1434 chain_log: HC_CHAIN_LOG,
1435 search_depth: HC_SEARCH_DEPTH,
1436 target_len: HC_TARGET_LEN,
1437 }
1438 }
1439
1440 fn configure(&mut self, config: HcConfig) {
1441 let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
1442 self.hash_log = config.hash_log;
1443 self.chain_log = config.chain_log;
1444 self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
1445 self.target_len = config.target_len;
1446 if resize && !self.hash_table.is_empty() {
1447 self.hash_table.clear();
1449 self.chain_table.clear();
1450 }
1451 }
1452
1453 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1454 self.window_size = 0;
1455 self.history.clear();
1456 self.history_start = 0;
1457 self.history_abs_start = 0;
1458 self.offset_hist = [1, 4, 8];
1459 if !self.hash_table.is_empty() {
1460 self.hash_table.fill(HC_EMPTY);
1461 self.chain_table.fill(HC_EMPTY);
1462 }
1463 for mut data in self.window.drain(..) {
1464 data.resize(data.capacity(), 0);
1465 reuse_space(data);
1466 }
1467 }
1468
1469 fn get_last_space(&self) -> &[u8] {
1470 self.window.back().unwrap().as_slice()
1471 }
1472
1473 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1477 assert!(data.len() <= self.max_window_size);
1478 while self.window_size + data.len() > self.max_window_size {
1479 let removed = self.window.pop_front().unwrap();
1480 self.window_size -= removed.len();
1481 self.history_start += removed.len();
1482 self.history_abs_start += removed.len();
1483 reuse_space(removed);
1484 }
1485 self.compact_history();
1486 self.history.extend_from_slice(&data);
1487 self.window_size += data.len();
1488 self.window.push_back(data);
1489 }
1490
1491 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1492 while self.window_size > self.max_window_size {
1493 let removed = self.window.pop_front().unwrap();
1494 self.window_size -= removed.len();
1495 self.history_start += removed.len();
1496 self.history_abs_start += removed.len();
1497 reuse_space(removed);
1498 }
1499 }
1500
1501 fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
1504 let backfill_start = current_abs_start
1505 .saturating_sub(3)
1506 .max(self.history_abs_start);
1507 if backfill_start < current_abs_start {
1508 self.insert_positions(backfill_start, current_abs_start);
1509 }
1510 }
1511
1512 fn skip_matching(&mut self) {
1513 self.ensure_tables();
1514 let current_len = self.window.back().unwrap().len();
1515 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1516 self.backfill_boundary_positions(current_abs_start);
1517 self.insert_positions(current_abs_start, current_abs_start + current_len);
1518 }
1519
1520 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1521 self.ensure_tables();
1522
1523 let current_len = self.window.back().unwrap().len();
1524 if current_len == 0 {
1525 return;
1526 }
1527
1528 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1529 self.backfill_boundary_positions(current_abs_start);
1530
1531 let mut pos = 0usize;
1532 let mut literals_start = 0usize;
1533 while pos + HC_MIN_MATCH_LEN <= current_len {
1534 let abs_pos = current_abs_start + pos;
1535 let lit_len = pos - literals_start;
1536
1537 let best = self.find_best_match(abs_pos, lit_len);
1538 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1539 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1540 let current = self.window.back().unwrap().as_slice();
1541 let start = candidate.start - current_abs_start;
1542 let literals = ¤t[literals_start..start];
1543 handle_sequence(Sequence::Triple {
1544 literals,
1545 offset: candidate.offset,
1546 match_len: candidate.match_len,
1547 });
1548 let _ = encode_offset_with_history(
1549 candidate.offset as u32,
1550 literals.len() as u32,
1551 &mut self.offset_hist,
1552 );
1553 pos = start + candidate.match_len;
1554 literals_start = pos;
1555 } else {
1556 self.insert_position(abs_pos);
1557 pos += 1;
1558 }
1559 }
1560
1561 while pos + 4 <= current_len {
1564 self.insert_position(current_abs_start + pos);
1565 pos += 1;
1566 }
1567
1568 if literals_start < current_len {
1569 let current = self.window.back().unwrap().as_slice();
1570 handle_sequence(Sequence::Literals {
1571 literals: ¤t[literals_start..],
1572 });
1573 }
1574 }
1575
1576 fn ensure_tables(&mut self) {
1577 if self.hash_table.is_empty() {
1578 self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
1579 self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
1580 }
1581 }
1582
1583 fn compact_history(&mut self) {
1584 if self.history_start == 0 {
1585 return;
1586 }
1587 if self.history_start >= self.max_window_size
1588 || self.history_start * 2 >= self.history.len()
1589 {
1590 self.history.drain(..self.history_start);
1591 self.history_start = 0;
1592 }
1593 }
1594
1595 fn live_history(&self) -> &[u8] {
1596 &self.history[self.history_start..]
1597 }
1598
1599 fn history_abs_end(&self) -> usize {
1600 self.history_abs_start + self.live_history().len()
1601 }
1602
1603 fn hash_position(&self, data: &[u8]) -> usize {
1604 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1605 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1606 ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
1607 }
1608
1609 fn insert_position(&mut self, abs_pos: usize) {
1610 let idx = abs_pos - self.history_abs_start;
1611 let concat = self.live_history();
1612 if idx + 4 > concat.len() {
1613 return;
1614 }
1615 let hash = self.hash_position(&concat[idx..]);
1616 if abs_pos >= u32::MAX as usize {
1621 return;
1622 }
1623 let pos_u32 = abs_pos as u32;
1624 let stored = pos_u32 + 1;
1625 let chain_idx = pos_u32 as usize & ((1 << self.chain_log) - 1);
1626 let prev = self.hash_table[hash];
1627 self.chain_table[chain_idx] = prev;
1628 self.hash_table[hash] = stored;
1629 }
1630
1631 fn insert_positions(&mut self, start: usize, end: usize) {
1632 for pos in start..end {
1633 self.insert_position(pos);
1634 }
1635 }
1636
1637 fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
1640 let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
1641 let idx = abs_pos - self.history_abs_start;
1642 let concat = self.live_history();
1643 if idx + 4 > concat.len() {
1644 return buf;
1645 }
1646 let hash = self.hash_position(&concat[idx..]);
1647 let chain_mask = (1 << self.chain_log) - 1;
1648
1649 let mut cur = self.hash_table[hash];
1650 let mut filled = 0;
1651 let mut steps = 0;
1658 let max_chain_steps = self.search_depth * 4;
1659 while filled < self.search_depth && steps < max_chain_steps {
1660 if cur == HC_EMPTY {
1661 break;
1662 }
1663 let candidate_abs = cur.wrapping_sub(1) as usize;
1664 let next = self.chain_table[candidate_abs & chain_mask];
1665 steps += 1;
1666 if next == cur {
1667 if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
1670 buf[filled] = candidate_abs;
1671 }
1672 break;
1673 }
1674 cur = next;
1675 if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
1676 continue;
1677 }
1678 buf[filled] = candidate_abs;
1679 filled += 1;
1680 }
1681 buf
1682 }
1683
1684 fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1685 let rep = self.repcode_candidate(abs_pos, lit_len);
1686 let hash = self.hash_chain_candidate(abs_pos, lit_len);
1687 Self::better_candidate(rep, hash)
1688 }
1689
1690 fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1691 let concat = self.live_history();
1692 let current_idx = abs_pos - self.history_abs_start;
1693 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
1694 return None;
1695 }
1696
1697 let mut best: Option<MatchCandidate> = None;
1698 for candidate_abs in self.chain_candidates(abs_pos) {
1699 if candidate_abs == usize::MAX {
1700 break;
1701 }
1702 let candidate_idx = candidate_abs - self.history_abs_start;
1703 let match_len =
1704 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1705 if match_len >= HC_MIN_MATCH_LEN {
1706 let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
1707 best = Self::better_candidate(best, Some(candidate));
1708 if best.is_some_and(|b| b.match_len >= self.target_len) {
1709 return best;
1710 }
1711 }
1712 }
1713 best
1714 }
1715
1716 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1717 let reps = if lit_len == 0 {
1718 [
1719 Some(self.offset_hist[1] as usize),
1720 Some(self.offset_hist[2] as usize),
1721 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1722 ]
1723 } else {
1724 [
1725 Some(self.offset_hist[0] as usize),
1726 Some(self.offset_hist[1] as usize),
1727 Some(self.offset_hist[2] as usize),
1728 ]
1729 };
1730
1731 let concat = self.live_history();
1732 let current_idx = abs_pos - self.history_abs_start;
1733 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
1734 return None;
1735 }
1736
1737 let mut best = None;
1738 for rep in reps.into_iter().flatten() {
1739 if rep == 0 || rep > abs_pos {
1740 continue;
1741 }
1742 let candidate_pos = abs_pos - rep;
1743 if candidate_pos < self.history_abs_start {
1744 continue;
1745 }
1746 let candidate_idx = candidate_pos - self.history_abs_start;
1747 let match_len =
1748 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1749 if match_len >= HC_MIN_MATCH_LEN {
1750 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1751 best = Self::better_candidate(best, Some(candidate));
1752 }
1753 }
1754 best
1755 }
1756
1757 fn extend_backwards(
1758 &self,
1759 mut candidate_pos: usize,
1760 mut abs_pos: usize,
1761 mut match_len: usize,
1762 lit_len: usize,
1763 ) -> MatchCandidate {
1764 let concat = self.live_history();
1765 let min_abs_pos = abs_pos - lit_len;
1766 while abs_pos > min_abs_pos
1767 && candidate_pos > self.history_abs_start
1768 && concat[candidate_pos - self.history_abs_start - 1]
1769 == concat[abs_pos - self.history_abs_start - 1]
1770 {
1771 candidate_pos -= 1;
1772 abs_pos -= 1;
1773 match_len += 1;
1774 }
1775 MatchCandidate {
1776 start: abs_pos,
1777 offset: abs_pos - candidate_pos,
1778 match_len,
1779 }
1780 }
1781
1782 fn better_candidate(
1783 lhs: Option<MatchCandidate>,
1784 rhs: Option<MatchCandidate>,
1785 ) -> Option<MatchCandidate> {
1786 match (lhs, rhs) {
1787 (None, other) | (other, None) => other,
1788 (Some(lhs), Some(rhs)) => {
1789 let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
1790 let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
1791 if rhs_gain > lhs_gain {
1792 Some(rhs)
1793 } else {
1794 Some(lhs)
1795 }
1796 }
1797 }
1798 }
1799
1800 fn match_gain(match_len: usize, offset: usize) -> i32 {
1801 debug_assert!(
1802 offset > 0,
1803 "zstd offsets are 1-indexed, offset=0 is invalid"
1804 );
1805 let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
1806 (match_len as i32) * 4 - offset_bits
1807 }
1808
1809 fn pick_lazy_match(
1813 &self,
1814 abs_pos: usize,
1815 lit_len: usize,
1816 best: Option<MatchCandidate>,
1817 ) -> Option<MatchCandidate> {
1818 let best = best?;
1819 if best.match_len >= self.target_len
1820 || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
1821 {
1822 return Some(best);
1823 }
1824
1825 let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
1826
1827 let next = self.find_best_match(abs_pos + 1, lit_len + 1);
1829 if let Some(next) = next {
1830 let next_gain = Self::match_gain(next.match_len, next.offset);
1831 if next_gain > current_gain {
1832 return None;
1833 }
1834 }
1835
1836 if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
1838 let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
1839 if let Some(next2) = next2 {
1840 let next2_gain = Self::match_gain(next2.match_len, next2.offset);
1841 if next2_gain > current_gain + 4 {
1843 return None;
1844 }
1845 }
1846 }
1847
1848 Some(best)
1849 }
1850}
1851
1852#[test]
1853fn matches() {
1854 let mut matcher = MatchGenerator::new(1000);
1855 let mut original_data = Vec::new();
1856 let mut reconstructed = Vec::new();
1857
1858 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
1859 Sequence::Literals { literals } => {
1860 assert!(!literals.is_empty());
1861 reconstructed.extend_from_slice(literals);
1862 }
1863 Sequence::Triple {
1864 literals,
1865 offset,
1866 match_len,
1867 } => {
1868 assert!(offset > 0);
1869 assert!(match_len >= MIN_MATCH_LEN);
1870 reconstructed.extend_from_slice(literals);
1871 assert!(offset <= reconstructed.len());
1872 let start = reconstructed.len() - offset;
1873 for i in 0..match_len {
1874 let byte = reconstructed[start + i];
1875 reconstructed.push(byte);
1876 }
1877 }
1878 };
1879
1880 matcher.add_data(
1881 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
1882 SuffixStore::with_capacity(100),
1883 |_, _| {},
1884 );
1885 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1886
1887 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1888
1889 assert!(!matcher.next_sequence(|_| {}));
1890
1891 matcher.add_data(
1892 alloc::vec![
1893 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
1894 ],
1895 SuffixStore::with_capacity(100),
1896 |_, _| {},
1897 );
1898 original_data.extend_from_slice(&[
1899 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
1900 ]);
1901
1902 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1903 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1904 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1905 assert!(!matcher.next_sequence(|_| {}));
1906
1907 matcher.add_data(
1908 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
1909 SuffixStore::with_capacity(100),
1910 |_, _| {},
1911 );
1912 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
1913
1914 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1915 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1916 assert!(!matcher.next_sequence(|_| {}));
1917
1918 matcher.add_data(
1919 alloc::vec![0, 0, 0, 0, 0],
1920 SuffixStore::with_capacity(100),
1921 |_, _| {},
1922 );
1923 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
1924
1925 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1926 assert!(!matcher.next_sequence(|_| {}));
1927
1928 matcher.add_data(
1929 alloc::vec![7, 8, 9, 10, 11],
1930 SuffixStore::with_capacity(100),
1931 |_, _| {},
1932 );
1933 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
1934
1935 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1936 assert!(!matcher.next_sequence(|_| {}));
1937
1938 matcher.add_data(
1939 alloc::vec![1, 3, 5, 7, 9],
1940 SuffixStore::with_capacity(100),
1941 |_, _| {},
1942 );
1943 matcher.skip_matching();
1944 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
1945 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
1946 assert!(!matcher.next_sequence(|_| {}));
1947
1948 matcher.add_data(
1949 alloc::vec![1, 3, 5, 7, 9],
1950 SuffixStore::with_capacity(100),
1951 |_, _| {},
1952 );
1953 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
1954
1955 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1956 assert!(!matcher.next_sequence(|_| {}));
1957
1958 matcher.add_data(
1959 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
1960 SuffixStore::with_capacity(100),
1961 |_, _| {},
1962 );
1963 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
1964
1965 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1966 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1967 assert!(!matcher.next_sequence(|_| {}));
1968
1969 assert_eq!(reconstructed, original_data);
1970}
1971
1972#[test]
1973fn dfast_matches_roundtrip_multi_block_pattern() {
1974 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
1975 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
1976 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
1977
1978 let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
1979 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
1980 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
1981 Sequence::Triple {
1982 literals,
1983 offset,
1984 match_len,
1985 } => {
1986 decoded.extend_from_slice(literals);
1987 let start = decoded.len() - offset;
1988 for i in 0..match_len {
1989 let byte = decoded[start + i];
1990 decoded.push(byte);
1991 }
1992 }
1993 };
1994
1995 matcher.add_data(first_block.clone(), |_| {});
1996 let mut history = Vec::new();
1997 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
1998 assert_eq!(history, first_block);
1999
2000 matcher.add_data(second_block.clone(), |_| {});
2001 let prefix_len = history.len();
2002 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2003
2004 assert_eq!(&history[prefix_len..], second_block.as_slice());
2005}
2006
2007#[test]
2008fn driver_switches_backends_and_initializes_dfast_via_reset() {
2009 let mut driver = MatchGeneratorDriver::new(32, 2);
2010
2011 driver.reset(CompressionLevel::Default);
2012 assert_eq!(driver.window_size(), DFAST_DEFAULT_WINDOW_SIZE as u64);
2013
2014 let mut first = driver.get_next_space();
2015 first[..12].copy_from_slice(b"abcabcabcabc");
2016 first.truncate(12);
2017 driver.commit_space(first);
2018 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
2019 driver.skip_matching();
2020
2021 let mut second = driver.get_next_space();
2022 second[..12].copy_from_slice(b"abcabcabcabc");
2023 second.truncate(12);
2024 driver.commit_space(second);
2025
2026 let mut reconstructed = b"abcabcabcabc".to_vec();
2027 driver.start_matching(|seq| match seq {
2028 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2029 Sequence::Triple {
2030 literals,
2031 offset,
2032 match_len,
2033 } => {
2034 reconstructed.extend_from_slice(literals);
2035 let start = reconstructed.len() - offset;
2036 for i in 0..match_len {
2037 let byte = reconstructed[start + i];
2038 reconstructed.push(byte);
2039 }
2040 }
2041 });
2042 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
2043
2044 driver.reset(CompressionLevel::Fastest);
2045 assert_eq!(driver.window_size(), 64);
2046}
2047
2048#[test]
2049fn driver_best_to_fastest_releases_oversized_hc_tables() {
2050 let mut driver = MatchGeneratorDriver::new(32, 2);
2051
2052 driver.reset(CompressionLevel::Best);
2054 assert_eq!(driver.window_size(), BEST_DEFAULT_WINDOW_SIZE as u64);
2055
2056 let mut space = driver.get_next_space();
2058 space[..12].copy_from_slice(b"abcabcabcabc");
2059 space.truncate(12);
2060 driver.commit_space(space);
2061 driver.skip_matching();
2062
2063 driver.reset(CompressionLevel::Fastest);
2065 assert_eq!(driver.window_size(), 64);
2066
2067 let hc = driver.hc_match_generator.as_ref().unwrap();
2069 assert!(
2070 hc.hash_table.is_empty(),
2071 "HC hash_table should be released after switching away from Best"
2072 );
2073 assert!(
2074 hc.chain_table.is_empty(),
2075 "HC chain_table should be released after switching away from Best"
2076 );
2077}
2078
2079#[test]
2080fn driver_better_to_best_resizes_hc_tables() {
2081 let mut driver = MatchGeneratorDriver::new(32, 2);
2082
2083 driver.reset(CompressionLevel::Better);
2085 assert_eq!(driver.window_size(), BETTER_DEFAULT_WINDOW_SIZE as u64);
2086
2087 let mut space = driver.get_next_space();
2088 space[..12].copy_from_slice(b"abcabcabcabc");
2089 space.truncate(12);
2090 driver.commit_space(space);
2091 driver.skip_matching();
2092
2093 let hc = driver.hc_match_generator.as_ref().unwrap();
2094 let better_hash_len = hc.hash_table.len();
2095 let better_chain_len = hc.chain_table.len();
2096
2097 driver.reset(CompressionLevel::Best);
2099 assert_eq!(driver.window_size(), BEST_DEFAULT_WINDOW_SIZE as u64);
2100
2101 let mut space = driver.get_next_space();
2103 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2104 space.truncate(12);
2105 driver.commit_space(space);
2106 driver.skip_matching();
2107
2108 let hc = driver.hc_match_generator.as_ref().unwrap();
2109 assert!(
2110 hc.hash_table.len() > better_hash_len,
2111 "Best hash_table ({}) should be larger than Better ({})",
2112 hc.hash_table.len(),
2113 better_hash_len
2114 );
2115 assert!(
2116 hc.chain_table.len() > better_chain_len,
2117 "Best chain_table ({}) should be larger than Better ({})",
2118 hc.chain_table.len(),
2119 better_chain_len
2120 );
2121}
2122
2123#[test]
2124fn prime_with_dictionary_preserves_history_for_first_full_block() {
2125 let mut driver = MatchGeneratorDriver::new(8, 1);
2126 driver.reset(CompressionLevel::Fastest);
2127
2128 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2129
2130 let mut space = driver.get_next_space();
2131 space.clear();
2132 space.extend_from_slice(b"abcdefgh");
2133 driver.commit_space(space);
2134
2135 let mut saw_match = false;
2136 driver.start_matching(|seq| {
2137 if let Sequence::Triple {
2138 literals,
2139 offset,
2140 match_len,
2141 } = seq
2142 && literals.is_empty()
2143 && offset == 8
2144 && match_len >= MIN_MATCH_LEN
2145 {
2146 saw_match = true;
2147 }
2148 });
2149
2150 assert!(
2151 saw_match,
2152 "first full block should still match dictionary-primed history"
2153 );
2154}
2155
2156#[test]
2157fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
2158 let mut driver = MatchGeneratorDriver::new(8, 1);
2159 driver.reset(CompressionLevel::Fastest);
2160
2161 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2162
2163 let mut space = driver.get_next_space();
2164 space.clear();
2165 space.extend_from_slice(b"abcdefgh");
2166 driver.commit_space(space);
2167
2168 let mut saw_match = false;
2169 driver.start_matching(|seq| {
2170 if let Sequence::Triple {
2171 literals,
2172 offset,
2173 match_len,
2174 } = seq
2175 && literals.is_empty()
2176 && offset == 24
2177 && match_len >= MIN_MATCH_LEN
2178 {
2179 saw_match = true;
2180 }
2181 });
2182
2183 assert!(
2184 saw_match,
2185 "dictionary bytes should remain addressable until frame output exceeds the live window"
2186 );
2187}
2188
2189#[test]
2190fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
2191 let mut driver = MatchGeneratorDriver::new(8, 1);
2192 driver.reset(CompressionLevel::Fastest);
2193
2194 driver.prime_with_dictionary(&[], [11, 7, 3]);
2195
2196 assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
2197}
2198
2199#[test]
2200fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
2201 let mut driver = MatchGeneratorDriver::new(8, 1);
2202 driver.reset(CompressionLevel::Default);
2203
2204 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2205
2206 let mut space = driver.get_next_space();
2207 space.clear();
2208 space.extend_from_slice(b"abcdefgh");
2209 driver.commit_space(space);
2210
2211 let mut saw_match = false;
2212 driver.start_matching(|seq| {
2213 if let Sequence::Triple {
2214 literals,
2215 offset,
2216 match_len,
2217 } = seq
2218 && literals.is_empty()
2219 && offset == 8
2220 && match_len >= DFAST_MIN_MATCH_LEN
2221 {
2222 saw_match = true;
2223 }
2224 });
2225
2226 assert!(
2227 saw_match,
2228 "dfast backend should match dictionary-primed history in first full block"
2229 );
2230}
2231
2232#[test]
2233fn prime_with_dictionary_does_not_inflate_reported_window_size() {
2234 let mut driver = MatchGeneratorDriver::new(8, 1);
2235 driver.reset(CompressionLevel::Fastest);
2236
2237 let before = driver.window_size();
2238 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2239 let after = driver.window_size();
2240
2241 assert_eq!(
2242 after, before,
2243 "dictionary retention budget must not change reported frame window size"
2244 );
2245}
2246
2247#[test]
2248fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
2249 let mut driver = MatchGeneratorDriver::new(8, 2);
2250 driver.reset(CompressionLevel::Fastest);
2251
2252 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
2255
2256 assert!(
2257 driver
2258 .match_generator
2259 .window
2260 .iter()
2261 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
2262 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
2263 );
2264}
2265
2266#[test]
2267fn prime_with_dictionary_counts_only_committed_tail_budget() {
2268 let mut driver = MatchGeneratorDriver::new(8, 1);
2269 driver.reset(CompressionLevel::Fastest);
2270
2271 let before = driver.match_generator.max_window_size;
2272 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
2274
2275 assert_eq!(
2276 driver.match_generator.max_window_size,
2277 before + 8,
2278 "retention budget must account only for dictionary bytes actually committed to history"
2279 );
2280}
2281
2282#[test]
2283fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
2284 let mut driver = MatchGeneratorDriver::new(8, 1);
2285 driver.reset(CompressionLevel::Default);
2286
2287 let before = driver.dfast_matcher().max_window_size;
2288 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
2291
2292 assert_eq!(
2293 driver.dfast_matcher().max_window_size,
2294 before + 12,
2295 "dfast retention budget should include 4-byte dictionary tails"
2296 );
2297}
2298
2299#[test]
2300fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
2301 let mut driver = MatchGeneratorDriver::new(8, 1);
2302 driver.reset(CompressionLevel::Fastest);
2303
2304 let base_window = driver.match_generator.max_window_size;
2305 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2306 assert_eq!(driver.match_generator.max_window_size, base_window + 24);
2307
2308 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2309 let mut space = driver.get_next_space();
2310 space.clear();
2311 space.extend_from_slice(block);
2312 driver.commit_space(space);
2313 driver.skip_matching();
2314 }
2315
2316 assert_eq!(
2317 driver.dictionary_retained_budget, 0,
2318 "dictionary budget should be fully retired once primed dict slices are evicted"
2319 );
2320 assert_eq!(
2321 driver.match_generator.max_window_size, base_window,
2322 "retired dictionary budget must not remain reusable for live history"
2323 );
2324}
2325
2326#[test]
2327fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
2328 let mut driver = MatchGeneratorDriver::new(8, 1);
2329 driver.reset(CompressionLevel::Default);
2330 driver.dfast_matcher_mut().max_window_size = 8;
2333 driver.reported_window_size = 8;
2334
2335 let base_window = driver.dfast_matcher().max_window_size;
2336 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2337 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
2338
2339 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2340 let mut space = driver.get_next_space();
2341 space.clear();
2342 space.extend_from_slice(block);
2343 driver.commit_space(space);
2344 driver.skip_matching();
2345 }
2346
2347 assert_eq!(
2348 driver.dictionary_retained_budget, 0,
2349 "dictionary budget should be fully retired once primed dict slices are evicted"
2350 );
2351 assert_eq!(
2352 driver.dfast_matcher().max_window_size,
2353 base_window,
2354 "retired dictionary budget must not remain reusable for live history"
2355 );
2356}
2357
2358#[test]
2359fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
2360 let mut driver = MatchGeneratorDriver::new(8, 1);
2361 driver.reset(CompressionLevel::Better);
2362
2363 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2364
2365 let mut space = driver.get_next_space();
2366 space.clear();
2367 space.extend_from_slice(b"abcdefgh");
2370 driver.commit_space(space);
2371
2372 let mut saw_match = false;
2373 driver.start_matching(|seq| {
2374 if let Sequence::Triple {
2375 literals,
2376 offset,
2377 match_len,
2378 } = seq
2379 && literals.is_empty()
2380 && offset == 8
2381 && match_len >= HC_MIN_MATCH_LEN
2382 {
2383 saw_match = true;
2384 }
2385 });
2386
2387 assert!(
2388 saw_match,
2389 "hash-chain backend should match dictionary-primed history in first full block"
2390 );
2391}
2392
2393#[test]
2394fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
2395 let mut driver = MatchGeneratorDriver::new(8, 1);
2396 driver.reset(CompressionLevel::Better);
2397 driver.hc_matcher_mut().max_window_size = 8;
2399 driver.reported_window_size = 8;
2400
2401 let base_window = driver.hc_matcher().max_window_size;
2402 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2403 assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
2404
2405 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2406 let mut space = driver.get_next_space();
2407 space.clear();
2408 space.extend_from_slice(block);
2409 driver.commit_space(space);
2410 driver.skip_matching();
2411 }
2412
2413 assert_eq!(
2414 driver.dictionary_retained_budget, 0,
2415 "dictionary budget should be fully retired once primed dict slices are evicted"
2416 );
2417 assert_eq!(
2418 driver.hc_matcher().max_window_size,
2419 base_window,
2420 "retired dictionary budget must not remain reusable for live history"
2421 );
2422}
2423
2424#[test]
2425fn suffix_store_with_single_slot_does_not_panic_on_keying() {
2426 let mut suffixes = SuffixStore::with_capacity(1);
2427 suffixes.insert(b"abcde", 0);
2428 assert!(suffixes.contains_key(b"abcde"));
2429 assert_eq!(suffixes.get(b"abcde"), Some(0));
2430}
2431
2432#[test]
2433fn fastest_reset_uses_interleaved_hash_fill_step() {
2434 let mut driver = MatchGeneratorDriver::new(32, 2);
2435
2436 driver.reset(CompressionLevel::Uncompressed);
2437 assert_eq!(driver.match_generator.hash_fill_step, 1);
2438
2439 driver.reset(CompressionLevel::Fastest);
2440 assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
2441
2442 driver.reset(CompressionLevel::Better);
2445 assert_eq!(driver.active_backend, MatcherBackend::HashChain);
2446 assert_eq!(driver.window_size(), BETTER_DEFAULT_WINDOW_SIZE as u64);
2447 assert_eq!(driver.hc_matcher().lazy_depth, 2);
2448}
2449
2450#[test]
2451fn simple_matcher_updates_offset_history_after_emitting_match() {
2452 let mut matcher = MatchGenerator::new(64);
2453 matcher.add_data(
2454 b"abcdeabcdeabcde".to_vec(),
2455 SuffixStore::with_capacity(64),
2456 |_, _| {},
2457 );
2458
2459 assert!(matcher.next_sequence(|seq| {
2460 assert_eq!(
2461 seq,
2462 Sequence::Triple {
2463 literals: b"abcde",
2464 offset: 5,
2465 match_len: 10,
2466 }
2467 );
2468 }));
2469 assert_eq!(matcher.offset_hist, [5, 1, 4]);
2470}
2471
2472#[test]
2473fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
2474 let mut matcher = MatchGenerator::new(64);
2475 matcher.add_data(
2476 b"abcdefghijabcdefghij".to_vec(),
2477 SuffixStore::with_capacity(64),
2478 |_, _| {},
2479 );
2480
2481 matcher.suffix_idx = 10;
2482 matcher.last_idx_in_sequence = 10;
2483 matcher.offset_hist = [99, 10, 4];
2484
2485 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2486 assert_eq!(candidate, Some((10, 10)));
2487}
2488
2489#[test]
2490fn simple_matcher_repcode_can_target_previous_window_entry() {
2491 let mut matcher = MatchGenerator::new(64);
2492 matcher.add_data(
2493 b"abcdefghij".to_vec(),
2494 SuffixStore::with_capacity(64),
2495 |_, _| {},
2496 );
2497 matcher.skip_matching();
2498 matcher.add_data(
2499 b"abcdefghij".to_vec(),
2500 SuffixStore::with_capacity(64),
2501 |_, _| {},
2502 );
2503
2504 matcher.offset_hist = [99, 10, 4];
2505
2506 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
2507 assert_eq!(candidate, Some((10, 10)));
2508}
2509
2510#[test]
2511fn simple_matcher_zero_literal_repcode_checks_rep2() {
2512 let mut matcher = MatchGenerator::new(64);
2513 matcher.add_data(
2514 b"abcdefghijabcdefghij".to_vec(),
2515 SuffixStore::with_capacity(64),
2516 |_, _| {},
2517 );
2518 matcher.suffix_idx = 10;
2519 matcher.last_idx_in_sequence = 10;
2520 matcher.offset_hist = [99, 4, 10];
2522
2523 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2524 assert_eq!(candidate, Some((10, 10)));
2525}
2526
2527#[test]
2528fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
2529 let mut matcher = MatchGenerator::new(64);
2530 matcher.add_data(
2531 b"abcdefghijabcdefghij".to_vec(),
2532 SuffixStore::with_capacity(64),
2533 |_, _| {},
2534 );
2535 matcher.suffix_idx = 10;
2536 matcher.last_idx_in_sequence = 10;
2537 matcher.offset_hist = [11, 4, 99];
2539
2540 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2541 assert_eq!(candidate, Some((10, 10)));
2542}
2543
2544#[test]
2545fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
2546 let mut matcher = MatchGenerator::new(64);
2547 matcher.add_data(
2548 b"abcdefghij".to_vec(),
2549 SuffixStore::with_capacity(64),
2550 |_, _| {},
2551 );
2552 matcher.skip_matching();
2553 matcher.add_data(
2554 b"klmnopqrst".to_vec(),
2555 SuffixStore::with_capacity(64),
2556 |_, _| {},
2557 );
2558 matcher.suffix_idx = 3;
2559
2560 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
2561 assert_eq!(candidate, None);
2562}
2563
2564#[test]
2565fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
2566 let mut matcher = MatchGenerator::new(64);
2567 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2568 matcher.add_data(
2569 b"abcdefghijklmnop".to_vec(),
2570 SuffixStore::with_capacity(64),
2571 |_, _| {},
2572 );
2573 matcher.skip_matching();
2574 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
2575
2576 assert!(matcher.next_sequence(|seq| {
2577 assert_eq!(
2578 seq,
2579 Sequence::Triple {
2580 literals: b"",
2581 offset: 15,
2582 match_len: 5,
2583 }
2584 );
2585 }));
2586 assert!(!matcher.next_sequence(|_| {}));
2587}
2588
2589#[test]
2590fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
2591 let mut matcher = MatchGenerator::new(64);
2592 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2593 matcher.add_data(
2594 b"01234abcde".to_vec(),
2595 SuffixStore::with_capacity(64),
2596 |_, _| {},
2597 );
2598 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
2599
2600 let last = matcher.window.last().unwrap();
2601 let tail = &last.data[5..10];
2602 assert_eq!(last.suffixes.get(tail), Some(5));
2603}
2604
2605#[test]
2606fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
2607 let mut matcher = MatchGenerator::new(128);
2608 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2609 matcher.add_data(
2610 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
2611 SuffixStore::with_capacity(1 << 16),
2612 |_, _| {},
2613 );
2614
2615 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
2616
2617 let last = matcher.window.last().unwrap();
2618 let first_key = &last.data[..MIN_MATCH_LEN];
2619 assert_eq!(last.suffixes.get(first_key), None);
2620}
2621
2622#[test]
2623fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
2624 let mut matcher = MatchGenerator::new(128);
2625 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2626 matcher.add_data(
2627 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
2628 SuffixStore::with_capacity(1 << 16),
2629 |_, _| {},
2630 );
2631
2632 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
2633
2634 let last = matcher.window.last().unwrap();
2635 for pos in [0usize, 3, 6, 9, 12] {
2636 let key = &last.data[pos..pos + MIN_MATCH_LEN];
2637 assert_eq!(
2638 last.suffixes.get(key),
2639 Some(pos),
2640 "expected interleaved suffix registration at pos {pos}"
2641 );
2642 }
2643}
2644
2645#[test]
2646fn dfast_skip_matching_handles_window_eviction() {
2647 let mut matcher = DfastMatchGenerator::new(16);
2648
2649 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
2650 matcher.skip_matching();
2651 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
2652 matcher.skip_matching();
2653 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
2654
2655 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
2656 matcher.start_matching(|seq| match seq {
2657 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2658 Sequence::Triple {
2659 literals,
2660 offset,
2661 match_len,
2662 } => {
2663 reconstructed.extend_from_slice(literals);
2664 let start = reconstructed.len() - offset;
2665 for i in 0..match_len {
2666 let byte = reconstructed[start + i];
2667 reconstructed.push(byte);
2668 }
2669 }
2670 });
2671
2672 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
2673}
2674
2675#[test]
2676fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
2677 let mut matcher = DfastMatchGenerator::new(8);
2678
2679 let mut first = Vec::with_capacity(64);
2680 first.extend_from_slice(b"abcdefgh");
2681 matcher.add_data(first, |_| {});
2682
2683 let mut second = Vec::with_capacity(64);
2684 second.extend_from_slice(b"ijklmnop");
2685
2686 let mut observed_evicted_len = None;
2687 matcher.add_data(second, |data| {
2688 observed_evicted_len = Some(data.len());
2689 });
2690
2691 assert_eq!(
2692 observed_evicted_len,
2693 Some(8),
2694 "eviction callback must report evicted byte length, not backing capacity"
2695 );
2696}
2697
2698#[test]
2699fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
2700 let mut matcher = DfastMatchGenerator::new(16);
2701
2702 let mut first = Vec::with_capacity(64);
2703 first.extend_from_slice(b"abcdefgh");
2704 matcher.add_data(first, |_| {});
2705
2706 let mut second = Vec::with_capacity(64);
2707 second.extend_from_slice(b"ijklmnop");
2708 matcher.add_data(second, |_| {});
2709
2710 matcher.max_window_size = 8;
2711
2712 let mut observed_evicted_len = None;
2713 matcher.trim_to_window(|data| {
2714 observed_evicted_len = Some(data.len());
2715 });
2716
2717 assert_eq!(
2718 observed_evicted_len,
2719 Some(8),
2720 "trim callback must report evicted byte length, not backing capacity"
2721 );
2722}
2723
2724#[test]
2725fn dfast_inserts_tail_positions_for_next_block_matching() {
2726 let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
2727
2728 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
2729 let mut history = Vec::new();
2730 matcher.start_matching(|seq| match seq {
2731 Sequence::Literals { literals } => history.extend_from_slice(literals),
2732 Sequence::Triple { .. } => unreachable!("first block should not match history"),
2733 });
2734 assert_eq!(history, b"012345bcdea");
2735
2736 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
2737 let mut saw_first_sequence = false;
2738 matcher.start_matching(|seq| {
2739 assert!(!saw_first_sequence, "expected a single cross-block match");
2740 saw_first_sequence = true;
2741 match seq {
2742 Sequence::Literals { .. } => {
2743 panic!("expected tail-anchored cross-block match before any literals")
2744 }
2745 Sequence::Triple {
2746 literals,
2747 offset,
2748 match_len,
2749 } => {
2750 assert_eq!(literals, b"");
2751 assert_eq!(offset, 5);
2752 assert_eq!(match_len, 11);
2753 let start = history.len() - offset;
2754 for i in 0..match_len {
2755 let byte = history[start + i];
2756 history.push(byte);
2757 }
2758 }
2759 }
2760 });
2761
2762 assert!(
2763 saw_first_sequence,
2764 "expected tail-anchored cross-block match"
2765 );
2766 assert_eq!(history, b"012345bcdeabcdeabcdeab");
2767}