1use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10use core::convert::TryInto;
11use core::num::NonZeroUsize;
12
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17
18const MIN_MATCH_LEN: usize = 5;
19const FAST_HASH_FILL_STEP: usize = 3;
20const DFAST_MIN_MATCH_LEN: usize = 6;
21const DFAST_TARGET_LEN: usize = 48;
22const DFAST_HASH_BITS: usize = 20;
25const DFAST_SEARCH_DEPTH: usize = 4;
26const DFAST_DEFAULT_WINDOW_SIZE: usize = 1 << 22;
27const DFAST_EMPTY_SLOT: usize = usize::MAX;
28
29#[derive(Copy, Clone, PartialEq, Eq)]
30enum MatcherBackend {
31 Simple,
32 Dfast,
33}
34
35pub struct MatchGeneratorDriver {
37 vec_pool: Vec<Vec<u8>>,
38 suffix_pool: Vec<SuffixStore>,
39 match_generator: MatchGenerator,
40 dfast_match_generator: Option<DfastMatchGenerator>,
41 active_backend: MatcherBackend,
42 slice_size: usize,
43 base_slice_size: usize,
44 base_window_size: usize,
45 reported_window_size: usize,
48 dictionary_retained_budget: usize,
51}
52
53impl MatchGeneratorDriver {
54 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
57 let max_window_size = max_slices_in_window * slice_size;
58 Self {
59 vec_pool: Vec::new(),
60 suffix_pool: Vec::new(),
61 match_generator: MatchGenerator::new(max_window_size),
62 dfast_match_generator: None,
63 active_backend: MatcherBackend::Simple,
64 slice_size,
65 base_slice_size: slice_size,
66 base_window_size: max_window_size,
67 reported_window_size: max_window_size,
68 dictionary_retained_budget: 0,
69 }
70 }
71
72 fn level_config(&self, level: CompressionLevel) -> (MatcherBackend, usize, usize, usize) {
73 match level {
74 CompressionLevel::Uncompressed => (
75 MatcherBackend::Simple,
76 self.base_slice_size,
77 self.base_window_size,
78 1,
79 ),
80 CompressionLevel::Fastest => (
81 MatcherBackend::Simple,
82 self.base_slice_size,
83 self.base_window_size,
84 FAST_HASH_FILL_STEP,
85 ),
86 CompressionLevel::Default => (
87 MatcherBackend::Dfast,
88 self.base_slice_size,
89 DFAST_DEFAULT_WINDOW_SIZE,
90 1,
91 ),
92 CompressionLevel::Better => (
93 MatcherBackend::Simple,
94 self.base_slice_size,
95 self.base_window_size,
96 1,
97 ),
98 CompressionLevel::Best => (
99 MatcherBackend::Simple,
100 self.base_slice_size,
101 self.base_window_size,
102 1,
103 ),
104 }
105 }
106
107 fn dfast_matcher(&self) -> &DfastMatchGenerator {
108 self.dfast_match_generator
109 .as_ref()
110 .expect("dfast backend must be initialized by reset() before use")
111 }
112
113 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
114 self.dfast_match_generator
115 .as_mut()
116 .expect("dfast backend must be initialized by reset() before use")
117 }
118
119 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
120 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
121 if reclaimed == 0 {
122 return;
123 }
124 self.dictionary_retained_budget -= reclaimed;
125 match self.active_backend {
126 MatcherBackend::Simple => {
127 self.match_generator.max_window_size = self
128 .match_generator
129 .max_window_size
130 .saturating_sub(reclaimed);
131 }
132 MatcherBackend::Dfast => {
133 let matcher = self.dfast_matcher_mut();
134 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
135 }
136 }
137 }
138
139 fn trim_after_budget_retire(&mut self) {
140 loop {
141 let mut evicted_bytes = 0usize;
142 match self.active_backend {
143 MatcherBackend::Simple => {
144 let vec_pool = &mut self.vec_pool;
145 let suffix_pool = &mut self.suffix_pool;
146 self.match_generator.reserve(0, |mut data, mut suffixes| {
147 evicted_bytes += data.len();
148 data.resize(data.capacity(), 0);
149 vec_pool.push(data);
150 suffixes.slots.clear();
151 suffixes.slots.resize(suffixes.slots.capacity(), None);
152 suffix_pool.push(suffixes);
153 });
154 }
155 MatcherBackend::Dfast => {
156 let mut retired = Vec::new();
157 self.dfast_matcher_mut().trim_to_window(|data| {
158 evicted_bytes += data.len();
159 retired.push(data);
160 });
161 for mut data in retired {
162 data.resize(data.capacity(), 0);
163 self.vec_pool.push(data);
164 }
165 }
166 }
167 if evicted_bytes == 0 {
168 break;
169 }
170 self.retire_dictionary_budget(evicted_bytes);
171 }
172 }
173}
174
175impl Matcher for MatchGeneratorDriver {
176 fn supports_dictionary_priming(&self) -> bool {
177 true
178 }
179
180 fn reset(&mut self, level: CompressionLevel) {
181 let (backend, slice_size, max_window_size, hash_fill_step) = self.level_config(level);
182 self.dictionary_retained_budget = 0;
183 if self.active_backend != backend {
184 match self.active_backend {
185 MatcherBackend::Simple => {
186 let vec_pool = &mut self.vec_pool;
187 let suffix_pool = &mut self.suffix_pool;
188 self.match_generator.reset(|mut data, mut suffixes| {
189 data.resize(data.capacity(), 0);
190 vec_pool.push(data);
191 suffixes.slots.clear();
192 suffixes.slots.resize(suffixes.slots.capacity(), None);
193 suffix_pool.push(suffixes);
194 });
195 }
196 MatcherBackend::Dfast => {
197 if let Some(dfast) = self.dfast_match_generator.as_mut() {
198 let vec_pool = &mut self.vec_pool;
199 dfast.reset(|mut data| {
200 data.resize(data.capacity(), 0);
201 vec_pool.push(data);
202 });
203 }
204 }
205 }
206 }
207
208 self.active_backend = backend;
209 self.slice_size = slice_size;
210 self.reported_window_size = max_window_size;
211 match self.active_backend {
212 MatcherBackend::Simple => {
213 let vec_pool = &mut self.vec_pool;
214 let suffix_pool = &mut self.suffix_pool;
215 self.match_generator.max_window_size = max_window_size;
216 self.match_generator.hash_fill_step = hash_fill_step;
217 self.match_generator.reset(|mut data, mut suffixes| {
218 data.resize(data.capacity(), 0);
219 vec_pool.push(data);
220 suffixes.slots.clear();
221 suffixes.slots.resize(suffixes.slots.capacity(), None);
222 suffix_pool.push(suffixes);
223 });
224 }
225 MatcherBackend::Dfast => {
226 let dfast = self
227 .dfast_match_generator
228 .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
229 dfast.max_window_size = max_window_size;
230 let vec_pool = &mut self.vec_pool;
231 dfast.reset(|mut data| {
232 data.resize(data.capacity(), 0);
233 vec_pool.push(data);
234 });
235 }
236 }
237 }
238
239 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
240 match self.active_backend {
241 MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
242 MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
243 }
244
245 if dict_content.is_empty() {
246 return;
247 }
248
249 let retained_dict_budget = dict_content.len();
252 match self.active_backend {
253 MatcherBackend::Simple => {
254 self.match_generator.max_window_size = self
255 .match_generator
256 .max_window_size
257 .saturating_add(retained_dict_budget);
258 }
259 MatcherBackend::Dfast => {
260 let matcher = self.dfast_matcher_mut();
261 matcher.max_window_size =
262 matcher.max_window_size.saturating_add(retained_dict_budget);
263 }
264 }
265
266 let mut start = 0usize;
267 let mut committed_dict_budget = 0usize;
268 let min_primed_tail = match self.active_backend {
269 MatcherBackend::Simple => MIN_MATCH_LEN,
270 MatcherBackend::Dfast => 4,
271 };
272 while start < dict_content.len() {
273 let end = (start + self.slice_size).min(dict_content.len());
274 if end - start < min_primed_tail {
275 break;
276 }
277 let mut space = self.get_next_space();
278 space.clear();
279 space.extend_from_slice(&dict_content[start..end]);
280 self.commit_space(space);
281 self.skip_matching();
282 committed_dict_budget += end - start;
283 start = end;
284 }
285
286 let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
287 if uncommitted_tail_budget > 0 {
288 match self.active_backend {
289 MatcherBackend::Simple => {
290 self.match_generator.max_window_size = self
291 .match_generator
292 .max_window_size
293 .saturating_sub(uncommitted_tail_budget);
294 }
295 MatcherBackend::Dfast => {
296 let matcher = self.dfast_matcher_mut();
297 matcher.max_window_size = matcher
298 .max_window_size
299 .saturating_sub(uncommitted_tail_budget);
300 }
301 }
302 }
303 if committed_dict_budget > 0 {
304 self.dictionary_retained_budget = self
305 .dictionary_retained_budget
306 .saturating_add(committed_dict_budget);
307 }
308 }
309
310 fn window_size(&self) -> u64 {
311 self.reported_window_size as u64
312 }
313
314 fn get_next_space(&mut self) -> Vec<u8> {
315 self.vec_pool.pop().unwrap_or_else(|| {
316 let mut space = alloc::vec![0; self.slice_size];
317 space.resize(space.capacity(), 0);
318 space
319 })
320 }
321
322 fn get_last_space(&mut self) -> &[u8] {
323 match self.active_backend {
324 MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
325 MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
326 }
327 }
328
329 fn commit_space(&mut self, space: Vec<u8>) {
330 match self.active_backend {
331 MatcherBackend::Simple => {
332 let vec_pool = &mut self.vec_pool;
333 let mut evicted_bytes = 0usize;
334 let suffixes = self
335 .suffix_pool
336 .pop()
337 .unwrap_or_else(|| SuffixStore::with_capacity(space.len()));
338 let suffix_pool = &mut self.suffix_pool;
339 self.match_generator
340 .add_data(space, suffixes, |mut data, mut suffixes| {
341 evicted_bytes += data.len();
342 data.resize(data.capacity(), 0);
343 vec_pool.push(data);
344 suffixes.slots.clear();
345 suffixes.slots.resize(suffixes.slots.capacity(), None);
346 suffix_pool.push(suffixes);
347 });
348 self.retire_dictionary_budget(evicted_bytes);
349 self.trim_after_budget_retire();
350 }
351 MatcherBackend::Dfast => {
352 let vec_pool = &mut self.vec_pool;
353 let mut evicted_bytes = 0usize;
354 self.dfast_match_generator
355 .as_mut()
356 .expect("dfast backend must be initialized by reset() before use")
357 .add_data(space, |mut data| {
358 evicted_bytes += data.len();
359 data.resize(data.capacity(), 0);
360 vec_pool.push(data);
361 });
362 self.retire_dictionary_budget(evicted_bytes);
363 self.trim_after_budget_retire();
364 }
365 }
366 }
367
368 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
369 match self.active_backend {
370 MatcherBackend::Simple => {
371 while self.match_generator.next_sequence(&mut handle_sequence) {}
372 }
373 MatcherBackend::Dfast => self
374 .dfast_matcher_mut()
375 .start_matching(&mut handle_sequence),
376 }
377 }
378 fn skip_matching(&mut self) {
379 match self.active_backend {
380 MatcherBackend::Simple => self.match_generator.skip_matching(),
381 MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
382 }
383 }
384}
385
386struct SuffixStore {
389 slots: Vec<Option<NonZeroUsize>>,
393 len_log: u32,
394}
395
396impl SuffixStore {
397 fn with_capacity(capacity: usize) -> Self {
398 Self {
399 slots: alloc::vec![None; capacity],
400 len_log: capacity.ilog2(),
401 }
402 }
403
404 #[inline(always)]
405 fn insert(&mut self, suffix: &[u8], idx: usize) {
406 let key = self.key(suffix);
407 self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
408 }
409
410 #[inline(always)]
411 fn contains_key(&self, suffix: &[u8]) -> bool {
412 let key = self.key(suffix);
413 self.slots[key].is_some()
414 }
415
416 #[inline(always)]
417 fn get(&self, suffix: &[u8]) -> Option<usize> {
418 let key = self.key(suffix);
419 self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
420 }
421
422 #[inline(always)]
423 fn key(&self, suffix: &[u8]) -> usize {
424 if self.len_log == 0 {
426 return 0;
427 }
428
429 let s0 = suffix[0] as u64;
430 let s1 = suffix[1] as u64;
431 let s2 = suffix[2] as u64;
432 let s3 = suffix[3] as u64;
433 let s4 = suffix[4] as u64;
434
435 const POLY: u64 = 0xCF3BCCDCABu64;
436
437 let s0 = (s0 << 24).wrapping_mul(POLY);
438 let s1 = (s1 << 32).wrapping_mul(POLY);
439 let s2 = (s2 << 40).wrapping_mul(POLY);
440 let s3 = (s3 << 48).wrapping_mul(POLY);
441 let s4 = (s4 << 56).wrapping_mul(POLY);
442
443 let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
444 let index = index >> (64 - self.len_log);
445 index as usize % self.slots.len()
446 }
447}
448
449struct WindowEntry {
452 data: Vec<u8>,
453 suffixes: SuffixStore,
455 base_offset: usize,
457}
458
459pub(crate) struct MatchGenerator {
460 max_window_size: usize,
461 window: Vec<WindowEntry>,
464 window_size: usize,
465 #[cfg(debug_assertions)]
466 concat_window: Vec<u8>,
467 suffix_idx: usize,
469 last_idx_in_sequence: usize,
471 hash_fill_step: usize,
472 offset_hist: [u32; 3],
473}
474
475impl MatchGenerator {
476 #[inline(always)]
477 #[cfg(target_endian = "little")]
478 fn mismatch_byte_index(diff: usize) -> usize {
479 diff.trailing_zeros() as usize / 8
480 }
481
482 #[inline(always)]
483 #[cfg(target_endian = "big")]
484 fn mismatch_byte_index(diff: usize) -> usize {
485 diff.leading_zeros() as usize / 8
486 }
487
488 fn new(max_size: usize) -> Self {
490 Self {
491 max_window_size: max_size,
492 window: Vec::new(),
493 window_size: 0,
494 #[cfg(debug_assertions)]
495 concat_window: Vec::new(),
496 suffix_idx: 0,
497 last_idx_in_sequence: 0,
498 hash_fill_step: 1,
499 offset_hist: [1, 4, 8],
500 }
501 }
502
503 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
504 self.window_size = 0;
505 #[cfg(debug_assertions)]
506 self.concat_window.clear();
507 self.suffix_idx = 0;
508 self.last_idx_in_sequence = 0;
509 self.offset_hist = [1, 4, 8];
510 self.window.drain(..).for_each(|entry| {
511 reuse_space(entry.data, entry.suffixes);
512 });
513 }
514
515 fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
520 loop {
521 let last_entry = self.window.last().unwrap();
522 let data_slice = &last_entry.data;
523
524 if self.suffix_idx >= data_slice.len() {
526 if self.last_idx_in_sequence != self.suffix_idx {
527 let literals = &data_slice[self.last_idx_in_sequence..];
528 self.last_idx_in_sequence = self.suffix_idx;
529 handle_sequence(Sequence::Literals { literals });
530 return true;
531 } else {
532 return false;
533 }
534 }
535
536 let data_slice = &data_slice[self.suffix_idx..];
538 if data_slice.len() < MIN_MATCH_LEN {
539 let last_idx_in_sequence = self.last_idx_in_sequence;
540 self.last_idx_in_sequence = last_entry.data.len();
541 self.suffix_idx = last_entry.data.len();
542 handle_sequence(Sequence::Literals {
543 literals: &last_entry.data[last_idx_in_sequence..],
544 });
545 return true;
546 }
547
548 let key = &data_slice[..MIN_MATCH_LEN];
550 let literals_len = self.suffix_idx - self.last_idx_in_sequence;
551
552 let mut candidate = self.repcode_candidate(data_slice, literals_len);
554 for match_entry in self.window.iter() {
555 if let Some(match_index) = match_entry.suffixes.get(key) {
556 let match_slice = &match_entry.data[match_index..];
557
558 let match_len = Self::common_prefix_len(match_slice, data_slice);
560
561 if match_len >= MIN_MATCH_LEN {
563 let offset = match_entry.base_offset + self.suffix_idx - match_index;
564
565 #[cfg(debug_assertions)]
567 {
568 let unprocessed = last_entry.data.len() - self.suffix_idx;
569 let start = self.concat_window.len() - unprocessed - offset;
570 let end = start + match_len;
571 let check_slice = &self.concat_window[start..end];
572 debug_assert_eq!(check_slice, &match_slice[..match_len]);
573 }
574
575 if let Some((old_offset, old_match_len)) = candidate {
576 if match_len > old_match_len
577 || (match_len == old_match_len && offset < old_offset)
578 {
579 candidate = Some((offset, match_len));
580 }
581 } else {
582 candidate = Some((offset, match_len));
583 }
584 }
585 }
586 }
587
588 if let Some((offset, match_len)) = candidate {
589 self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
592
593 let last_entry = self.window.last().unwrap();
595 let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
596
597 self.suffix_idx += match_len;
599 self.last_idx_in_sequence = self.suffix_idx;
600 let _ = encode_offset_with_history(
601 offset as u32,
602 literals.len() as u32,
603 &mut self.offset_hist,
604 );
605 handle_sequence(Sequence::Triple {
606 literals,
607 offset,
608 match_len,
609 });
610
611 return true;
612 }
613
614 let last_entry = self.window.last_mut().unwrap();
615 let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
616 if !last_entry.suffixes.contains_key(key) {
617 last_entry.suffixes.insert(key, self.suffix_idx);
618 }
619 self.suffix_idx += 1;
620 }
621 }
622
623 #[inline(always)]
625 fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
626 let max = a.len().min(b.len());
627 let chunk = core::mem::size_of::<usize>();
628 let mut off = 0usize;
629 let lhs = a.as_ptr();
630 let rhs = b.as_ptr();
631
632 while off + chunk <= max {
633 let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
634 let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
635 let diff = lhs_word ^ rhs_word;
636 if diff != 0 {
637 return off + Self::mismatch_byte_index(diff);
638 }
639 off += chunk;
640 }
641
642 off + core::iter::zip(&a[off..max], &b[off..max])
643 .take_while(|(x, y)| x == y)
644 .count()
645 }
646
647 #[inline(always)]
649 fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
650 let start = self.suffix_idx;
651 let last_entry = self.window.last_mut().unwrap();
652 if last_entry.data.len() < MIN_MATCH_LEN {
653 return;
654 }
655 let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
656 if insert_limit > start {
657 let data = last_entry.data.as_slice();
658 let suffixes = &mut last_entry.suffixes;
659 if fill_step == FAST_HASH_FILL_STEP {
660 Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
661 } else {
662 let mut pos = start;
663 while pos < insert_limit {
664 Self::insert_suffix_if_absent(data, suffixes, pos);
665 pos += fill_step;
666 }
667 }
668 }
669
670 if idx >= start + MIN_MATCH_LEN {
671 let tail_start = idx - MIN_MATCH_LEN;
672 let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
673 if !last_entry.suffixes.contains_key(tail_key) {
674 last_entry.suffixes.insert(tail_key, tail_start);
675 }
676 }
677 }
678
679 #[inline(always)]
680 fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
681 debug_assert!(
682 pos + MIN_MATCH_LEN <= data.len(),
683 "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
684 pos,
685 MIN_MATCH_LEN,
686 data.len()
687 );
688 let key = &data[pos..pos + MIN_MATCH_LEN];
689 if !suffixes.contains_key(key) {
690 suffixes.insert(key, pos);
691 }
692 }
693
694 #[inline(always)]
695 fn add_suffixes_interleaved_fast(
696 data: &[u8],
697 suffixes: &mut SuffixStore,
698 start: usize,
699 insert_limit: usize,
700 ) {
701 let lane = FAST_HASH_FILL_STEP;
702 let mut pos = start;
703
704 while pos + lane * 3 < insert_limit {
707 let p0 = pos;
708 let p1 = pos + lane;
709 let p2 = pos + lane * 2;
710 let p3 = pos + lane * 3;
711
712 Self::insert_suffix_if_absent(data, suffixes, p0);
713 Self::insert_suffix_if_absent(data, suffixes, p1);
714 Self::insert_suffix_if_absent(data, suffixes, p2);
715 Self::insert_suffix_if_absent(data, suffixes, p3);
716
717 pos += lane * 4;
718 }
719
720 while pos < insert_limit {
721 Self::insert_suffix_if_absent(data, suffixes, pos);
722 pos += lane;
723 }
724 }
725
726 fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
727 if literals_len != 0 {
728 return None;
729 }
730
731 let reps = [
732 Some(self.offset_hist[1] as usize),
733 Some(self.offset_hist[2] as usize),
734 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
735 ];
736
737 let mut best: Option<(usize, usize)> = None;
738 let mut seen = [0usize; 3];
739 let mut seen_len = 0usize;
740 for offset in reps.into_iter().flatten() {
741 if offset == 0 {
742 continue;
743 }
744 if seen[..seen_len].contains(&offset) {
745 continue;
746 }
747 seen[seen_len] = offset;
748 seen_len += 1;
749
750 let Some(match_len) = self.offset_match_len(offset, data_slice) else {
751 continue;
752 };
753 if match_len < MIN_MATCH_LEN {
754 continue;
755 }
756
757 if best.is_none_or(|(old_offset, old_len)| {
758 match_len > old_len || (match_len == old_len && offset < old_offset)
759 }) {
760 best = Some((offset, match_len));
761 }
762 }
763 best
764 }
765
766 fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
767 if offset == 0 {
768 return None;
769 }
770
771 let last_idx = self.window.len().checked_sub(1)?;
772 let last_entry = &self.window[last_idx];
773 let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
774 if offset > searchable_prefix {
775 return None;
776 }
777
778 let mut remaining = offset;
779 let (entry_idx, match_index) = if remaining <= self.suffix_idx {
780 (last_idx, self.suffix_idx - remaining)
781 } else {
782 remaining -= self.suffix_idx;
783 let mut found = None;
784 for entry_idx in (0..last_idx).rev() {
785 let len = self.window[entry_idx].data.len();
786 if remaining <= len {
787 found = Some((entry_idx, len - remaining));
788 break;
789 }
790 remaining -= len;
791 }
792 found?
793 };
794
795 let match_entry = &self.window[entry_idx];
796 let match_slice = &match_entry.data[match_index..];
797
798 Some(Self::common_prefix_len(match_slice, data_slice))
799 }
800
801 fn skip_matching(&mut self) {
803 let len = self.window.last().unwrap().data.len();
804 self.add_suffixes_till(len, 1);
805 self.suffix_idx = len;
806 self.last_idx_in_sequence = len;
807 }
808
809 fn add_data(
812 &mut self,
813 data: Vec<u8>,
814 suffixes: SuffixStore,
815 reuse_space: impl FnMut(Vec<u8>, SuffixStore),
816 ) {
817 assert!(
818 self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
819 );
820 self.reserve(data.len(), reuse_space);
821 #[cfg(debug_assertions)]
822 self.concat_window.extend_from_slice(&data);
823
824 if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
825 for entry in self.window.iter_mut() {
826 entry.base_offset += last_len;
827 }
828 }
829
830 let len = data.len();
831 self.window.push(WindowEntry {
832 data,
833 suffixes,
834 base_offset: 0,
835 });
836 self.window_size += len;
837 self.suffix_idx = 0;
838 self.last_idx_in_sequence = 0;
839 }
840
841 fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
844 assert!(self.max_window_size >= amount);
845 while self.window_size + amount > self.max_window_size {
846 let removed = self.window.remove(0);
847 self.window_size -= removed.data.len();
848 #[cfg(debug_assertions)]
849 self.concat_window.drain(0..removed.data.len());
850
851 let WindowEntry {
852 suffixes,
853 data: leaked_vec,
854 base_offset: _,
855 } = removed;
856 reuse_space(leaked_vec, suffixes);
857 }
858 }
859}
860
861struct DfastMatchGenerator {
862 max_window_size: usize,
863 window: VecDeque<Vec<u8>>,
864 window_size: usize,
865 history: Vec<u8>,
868 history_start: usize,
869 history_abs_start: usize,
870 offset_hist: [u32; 3],
871 short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
872 long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
873}
874
875#[derive(Copy, Clone)]
876struct MatchCandidate {
877 start: usize,
878 offset: usize,
879 match_len: usize,
880}
881
882impl DfastMatchGenerator {
883 fn new(max_window_size: usize) -> Self {
884 Self {
885 max_window_size,
886 window: VecDeque::new(),
887 window_size: 0,
888 history: Vec::new(),
889 history_start: 0,
890 history_abs_start: 0,
891 offset_hist: [1, 4, 8],
892 short_hash: Vec::new(),
893 long_hash: Vec::new(),
894 }
895 }
896
897 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
898 self.window_size = 0;
899 self.history.clear();
900 self.history_start = 0;
901 self.history_abs_start = 0;
902 self.offset_hist = [1, 4, 8];
903 if !self.short_hash.is_empty() {
904 self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
905 self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
906 }
907 for mut data in self.window.drain(..) {
908 data.resize(data.capacity(), 0);
909 reuse_space(data);
910 }
911 }
912
913 fn get_last_space(&self) -> &[u8] {
914 self.window.back().unwrap().as_slice()
915 }
916
917 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
918 assert!(data.len() <= self.max_window_size);
919 while self.window_size + data.len() > self.max_window_size {
920 let removed = self.window.pop_front().unwrap();
921 self.window_size -= removed.len();
922 self.history_start += removed.len();
923 self.history_abs_start += removed.len();
924 reuse_space(removed);
925 }
926 self.compact_history();
927 self.history.extend_from_slice(&data);
928 self.window_size += data.len();
929 self.window.push_back(data);
930 }
931
932 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
933 while self.window_size > self.max_window_size {
934 let removed = self.window.pop_front().unwrap();
935 self.window_size -= removed.len();
936 self.history_start += removed.len();
937 self.history_abs_start += removed.len();
938 reuse_space(removed);
939 }
940 }
941
942 fn skip_matching(&mut self) {
943 self.ensure_hash_tables();
944 let current_len = self.window.back().unwrap().len();
945 let current_abs_start = self.history_abs_start + self.window_size - current_len;
946 self.insert_positions(current_abs_start, current_abs_start + current_len);
947 }
948
949 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
950 self.ensure_hash_tables();
951
952 let current_len = self.window.back().unwrap().len();
953 if current_len == 0 {
954 return;
955 }
956
957 let current_abs_start = self.history_abs_start + self.window_size - current_len;
958
959 let mut pos = 0usize;
960 let mut literals_start = 0usize;
961 while pos + DFAST_MIN_MATCH_LEN <= current_len {
962 let abs_pos = current_abs_start + pos;
963 let lit_len = pos - literals_start;
964
965 let best = self.best_match(abs_pos, lit_len);
966 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
967 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
968 let current = self.window.back().unwrap().as_slice();
969 let start = candidate.start - current_abs_start;
970 let literals = ¤t[literals_start..start];
971 handle_sequence(Sequence::Triple {
972 literals,
973 offset: candidate.offset,
974 match_len: candidate.match_len,
975 });
976 let _ = encode_offset_with_history(
979 candidate.offset as u32,
980 literals.len() as u32,
981 &mut self.offset_hist,
982 );
983 pos = start + candidate.match_len;
984 literals_start = pos;
985 } else {
986 self.insert_position(abs_pos);
987 pos += 1;
988 }
989 }
990
991 if literals_start < current_len {
992 let current = self.window.back().unwrap().as_slice();
999 handle_sequence(Sequence::Literals {
1000 literals: ¤t[literals_start..],
1001 });
1002 }
1003 }
1004
1005 fn ensure_hash_tables(&mut self) {
1006 if self.short_hash.is_empty() {
1007 self.short_hash =
1011 alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
1012 self.long_hash =
1013 alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
1014 }
1015 }
1016
1017 fn compact_history(&mut self) {
1018 if self.history_start == 0 {
1019 return;
1020 }
1021 if self.history_start >= self.max_window_size
1022 || self.history_start * 2 >= self.history.len()
1023 {
1024 self.history.drain(..self.history_start);
1025 self.history_start = 0;
1026 }
1027 }
1028
1029 fn live_history(&self) -> &[u8] {
1030 &self.history[self.history_start..]
1031 }
1032
1033 fn history_abs_end(&self) -> usize {
1034 self.history_abs_start + self.live_history().len()
1035 }
1036
1037 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1038 let rep = self.repcode_candidate(abs_pos, lit_len);
1039 let hash = self.hash_candidate(abs_pos, lit_len);
1040 Self::better_candidate(rep, hash)
1041 }
1042
1043 fn pick_lazy_match(
1044 &self,
1045 abs_pos: usize,
1046 lit_len: usize,
1047 best: Option<MatchCandidate>,
1048 ) -> Option<MatchCandidate> {
1049 let best = best?;
1050 if best.match_len >= DFAST_TARGET_LEN
1051 || abs_pos + 1 + DFAST_MIN_MATCH_LEN > self.history_abs_end()
1052 {
1053 return Some(best);
1054 }
1055
1056 let next = self.best_match(abs_pos + 1, lit_len + 1);
1057 match next {
1058 Some(next)
1059 if next.match_len > best.match_len
1060 || (next.match_len == best.match_len && next.offset < best.offset) =>
1061 {
1062 None
1063 }
1064 _ => Some(best),
1065 }
1066 }
1067
1068 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1069 let reps = if lit_len == 0 {
1070 [
1071 Some(self.offset_hist[1] as usize),
1072 Some(self.offset_hist[2] as usize),
1073 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1074 ]
1075 } else {
1076 [
1077 Some(self.offset_hist[0] as usize),
1078 Some(self.offset_hist[1] as usize),
1079 Some(self.offset_hist[2] as usize),
1080 ]
1081 };
1082
1083 let mut best = None;
1084 for rep in reps.into_iter().flatten() {
1085 if rep == 0 || rep > abs_pos {
1086 continue;
1087 }
1088 let candidate_pos = abs_pos - rep;
1089 if candidate_pos < self.history_abs_start {
1090 continue;
1091 }
1092 let concat = self.live_history();
1093 let candidate_idx = candidate_pos - self.history_abs_start;
1094 let current_idx = abs_pos - self.history_abs_start;
1095 if current_idx + DFAST_MIN_MATCH_LEN > concat.len() {
1096 continue;
1097 }
1098 let match_len =
1099 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1100 if match_len >= DFAST_MIN_MATCH_LEN {
1101 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1102 best = Self::better_candidate(best, Some(candidate));
1103 }
1104 }
1105 best
1106 }
1107
1108 fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1109 let concat = self.live_history();
1110 let current_idx = abs_pos - self.history_abs_start;
1111 let mut best = None;
1112 for candidate_pos in self.long_candidates(abs_pos) {
1113 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1114 continue;
1115 }
1116 let candidate_idx = candidate_pos - self.history_abs_start;
1117 let match_len =
1118 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1119 if match_len >= DFAST_MIN_MATCH_LEN {
1120 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1121 best = Self::better_candidate(best, Some(candidate));
1122 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1123 return best;
1124 }
1125 }
1126 }
1127
1128 for candidate_pos in self.short_candidates(abs_pos) {
1129 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1130 continue;
1131 }
1132 let candidate_idx = candidate_pos - self.history_abs_start;
1133 let match_len =
1134 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1135 if match_len >= DFAST_MIN_MATCH_LEN {
1136 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1137 best = Self::better_candidate(best, Some(candidate));
1138 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1139 return best;
1140 }
1141 }
1142 }
1143 best
1144 }
1145
1146 fn extend_backwards(
1147 &self,
1148 mut candidate_pos: usize,
1149 mut abs_pos: usize,
1150 mut match_len: usize,
1151 lit_len: usize,
1152 ) -> MatchCandidate {
1153 let concat = self.live_history();
1154 let min_abs_pos = abs_pos - lit_len;
1155 while abs_pos > min_abs_pos
1156 && candidate_pos > self.history_abs_start
1157 && concat[candidate_pos - self.history_abs_start - 1]
1158 == concat[abs_pos - self.history_abs_start - 1]
1159 {
1160 candidate_pos -= 1;
1161 abs_pos -= 1;
1162 match_len += 1;
1163 }
1164 MatchCandidate {
1165 start: abs_pos,
1166 offset: abs_pos - candidate_pos,
1167 match_len,
1168 }
1169 }
1170
1171 fn better_candidate(
1172 lhs: Option<MatchCandidate>,
1173 rhs: Option<MatchCandidate>,
1174 ) -> Option<MatchCandidate> {
1175 match (lhs, rhs) {
1176 (None, other) | (other, None) => other,
1177 (Some(lhs), Some(rhs)) => {
1178 if rhs.match_len > lhs.match_len
1179 || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1180 {
1181 Some(rhs)
1182 } else {
1183 Some(lhs)
1184 }
1185 }
1186 }
1187 }
1188
1189 fn insert_positions(&mut self, start: usize, end: usize) {
1190 for pos in start..end {
1191 self.insert_position(pos);
1192 }
1193 }
1194
1195 fn insert_position(&mut self, pos: usize) {
1196 let idx = pos - self.history_abs_start;
1197 let short = {
1198 let concat = self.live_history();
1199 (idx + 4 <= concat.len()).then(|| Self::hash4(&concat[idx..]))
1200 };
1201 if let Some(short) = short {
1202 let bucket = &mut self.short_hash[short];
1203 if bucket[0] != pos {
1204 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1205 bucket[0] = pos;
1206 }
1207 }
1208
1209 let long = {
1210 let concat = self.live_history();
1211 (idx + 8 <= concat.len()).then(|| Self::hash8(&concat[idx..]))
1212 };
1213 if let Some(long) = long {
1214 let bucket = &mut self.long_hash[long];
1215 if bucket[0] != pos {
1216 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1217 bucket[0] = pos;
1218 }
1219 }
1220 }
1221
1222 fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1223 let concat = self.live_history();
1224 let idx = pos - self.history_abs_start;
1225 (idx + 4 <= concat.len())
1226 .then(|| self.short_hash[Self::hash4(&concat[idx..])])
1227 .into_iter()
1228 .flatten()
1229 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1230 }
1231
1232 fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1233 let concat = self.live_history();
1234 let idx = pos - self.history_abs_start;
1235 (idx + 8 <= concat.len())
1236 .then(|| self.long_hash[Self::hash8(&concat[idx..])])
1237 .into_iter()
1238 .flatten()
1239 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1240 }
1241
1242 fn hash4(data: &[u8]) -> usize {
1243 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1244 Self::hash_bits(value)
1245 }
1246
1247 fn hash8(data: &[u8]) -> usize {
1248 let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1249 Self::hash_bits(value)
1250 }
1251
1252 fn hash_bits(value: u64) -> usize {
1253 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1254 ((value.wrapping_mul(PRIME)) >> (64 - DFAST_HASH_BITS)) as usize
1255 }
1256}
1257
1258#[test]
1259fn matches() {
1260 let mut matcher = MatchGenerator::new(1000);
1261 let mut original_data = Vec::new();
1262 let mut reconstructed = Vec::new();
1263
1264 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
1265 Sequence::Literals { literals } => {
1266 assert!(!literals.is_empty());
1267 reconstructed.extend_from_slice(literals);
1268 }
1269 Sequence::Triple {
1270 literals,
1271 offset,
1272 match_len,
1273 } => {
1274 assert!(offset > 0);
1275 assert!(match_len >= MIN_MATCH_LEN);
1276 reconstructed.extend_from_slice(literals);
1277 assert!(offset <= reconstructed.len());
1278 let start = reconstructed.len() - offset;
1279 for i in 0..match_len {
1280 let byte = reconstructed[start + i];
1281 reconstructed.push(byte);
1282 }
1283 }
1284 };
1285
1286 matcher.add_data(
1287 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
1288 SuffixStore::with_capacity(100),
1289 |_, _| {},
1290 );
1291 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1292
1293 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1294
1295 assert!(!matcher.next_sequence(|_| {}));
1296
1297 matcher.add_data(
1298 alloc::vec![
1299 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
1300 ],
1301 SuffixStore::with_capacity(100),
1302 |_, _| {},
1303 );
1304 original_data.extend_from_slice(&[
1305 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
1306 ]);
1307
1308 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1309 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1310 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1311 assert!(!matcher.next_sequence(|_| {}));
1312
1313 matcher.add_data(
1314 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
1315 SuffixStore::with_capacity(100),
1316 |_, _| {},
1317 );
1318 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
1319
1320 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1321 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1322 assert!(!matcher.next_sequence(|_| {}));
1323
1324 matcher.add_data(
1325 alloc::vec![0, 0, 0, 0, 0],
1326 SuffixStore::with_capacity(100),
1327 |_, _| {},
1328 );
1329 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
1330
1331 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1332 assert!(!matcher.next_sequence(|_| {}));
1333
1334 matcher.add_data(
1335 alloc::vec![7, 8, 9, 10, 11],
1336 SuffixStore::with_capacity(100),
1337 |_, _| {},
1338 );
1339 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
1340
1341 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1342 assert!(!matcher.next_sequence(|_| {}));
1343
1344 matcher.add_data(
1345 alloc::vec![1, 3, 5, 7, 9],
1346 SuffixStore::with_capacity(100),
1347 |_, _| {},
1348 );
1349 matcher.skip_matching();
1350 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
1351 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
1352 assert!(!matcher.next_sequence(|_| {}));
1353
1354 matcher.add_data(
1355 alloc::vec![1, 3, 5, 7, 9],
1356 SuffixStore::with_capacity(100),
1357 |_, _| {},
1358 );
1359 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
1360
1361 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1362 assert!(!matcher.next_sequence(|_| {}));
1363
1364 matcher.add_data(
1365 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
1366 SuffixStore::with_capacity(100),
1367 |_, _| {},
1368 );
1369 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
1370
1371 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1372 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1373 assert!(!matcher.next_sequence(|_| {}));
1374
1375 assert_eq!(reconstructed, original_data);
1376}
1377
1378#[test]
1379fn dfast_matches_roundtrip_multi_block_pattern() {
1380 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
1381 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
1382 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
1383
1384 let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
1385 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
1386 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
1387 Sequence::Triple {
1388 literals,
1389 offset,
1390 match_len,
1391 } => {
1392 decoded.extend_from_slice(literals);
1393 let start = decoded.len() - offset;
1394 for i in 0..match_len {
1395 let byte = decoded[start + i];
1396 decoded.push(byte);
1397 }
1398 }
1399 };
1400
1401 matcher.add_data(first_block.clone(), |_| {});
1402 let mut history = Vec::new();
1403 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
1404 assert_eq!(history, first_block);
1405
1406 matcher.add_data(second_block.clone(), |_| {});
1407 let prefix_len = history.len();
1408 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
1409
1410 assert_eq!(&history[prefix_len..], second_block.as_slice());
1411}
1412
1413#[test]
1414fn driver_switches_backends_and_initializes_dfast_via_reset() {
1415 let mut driver = MatchGeneratorDriver::new(32, 2);
1416
1417 driver.reset(CompressionLevel::Default);
1418 assert_eq!(driver.window_size(), DFAST_DEFAULT_WINDOW_SIZE as u64);
1419
1420 let mut first = driver.get_next_space();
1421 first[..12].copy_from_slice(b"abcabcabcabc");
1422 first.truncate(12);
1423 driver.commit_space(first);
1424 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
1425 driver.skip_matching();
1426
1427 let mut second = driver.get_next_space();
1428 second[..12].copy_from_slice(b"abcabcabcabc");
1429 second.truncate(12);
1430 driver.commit_space(second);
1431
1432 let mut reconstructed = b"abcabcabcabc".to_vec();
1433 driver.start_matching(|seq| match seq {
1434 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
1435 Sequence::Triple {
1436 literals,
1437 offset,
1438 match_len,
1439 } => {
1440 reconstructed.extend_from_slice(literals);
1441 let start = reconstructed.len() - offset;
1442 for i in 0..match_len {
1443 let byte = reconstructed[start + i];
1444 reconstructed.push(byte);
1445 }
1446 }
1447 });
1448 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
1449
1450 driver.reset(CompressionLevel::Fastest);
1451 assert_eq!(driver.window_size(), 64);
1452}
1453
1454#[test]
1455fn prime_with_dictionary_preserves_history_for_first_full_block() {
1456 let mut driver = MatchGeneratorDriver::new(8, 1);
1457 driver.reset(CompressionLevel::Fastest);
1458
1459 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
1460
1461 let mut space = driver.get_next_space();
1462 space.clear();
1463 space.extend_from_slice(b"abcdefgh");
1464 driver.commit_space(space);
1465
1466 let mut saw_match = false;
1467 driver.start_matching(|seq| {
1468 if let Sequence::Triple {
1469 literals,
1470 offset,
1471 match_len,
1472 } = seq
1473 && literals.is_empty()
1474 && offset == 8
1475 && match_len >= MIN_MATCH_LEN
1476 {
1477 saw_match = true;
1478 }
1479 });
1480
1481 assert!(
1482 saw_match,
1483 "first full block should still match dictionary-primed history"
1484 );
1485}
1486
1487#[test]
1488fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
1489 let mut driver = MatchGeneratorDriver::new(8, 1);
1490 driver.reset(CompressionLevel::Fastest);
1491
1492 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
1493
1494 let mut space = driver.get_next_space();
1495 space.clear();
1496 space.extend_from_slice(b"abcdefgh");
1497 driver.commit_space(space);
1498
1499 let mut saw_match = false;
1500 driver.start_matching(|seq| {
1501 if let Sequence::Triple {
1502 literals,
1503 offset,
1504 match_len,
1505 } = seq
1506 && literals.is_empty()
1507 && offset == 24
1508 && match_len >= MIN_MATCH_LEN
1509 {
1510 saw_match = true;
1511 }
1512 });
1513
1514 assert!(
1515 saw_match,
1516 "dictionary bytes should remain addressable until frame output exceeds the live window"
1517 );
1518}
1519
1520#[test]
1521fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
1522 let mut driver = MatchGeneratorDriver::new(8, 1);
1523 driver.reset(CompressionLevel::Fastest);
1524
1525 driver.prime_with_dictionary(&[], [11, 7, 3]);
1526
1527 assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
1528}
1529
1530#[test]
1531fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
1532 let mut driver = MatchGeneratorDriver::new(8, 1);
1533 driver.reset(CompressionLevel::Default);
1534
1535 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
1536
1537 let mut space = driver.get_next_space();
1538 space.clear();
1539 space.extend_from_slice(b"abcdefgh");
1540 driver.commit_space(space);
1541
1542 let mut saw_match = false;
1543 driver.start_matching(|seq| {
1544 if let Sequence::Triple {
1545 literals,
1546 offset,
1547 match_len,
1548 } = seq
1549 && literals.is_empty()
1550 && offset == 8
1551 && match_len >= DFAST_MIN_MATCH_LEN
1552 {
1553 saw_match = true;
1554 }
1555 });
1556
1557 assert!(
1558 saw_match,
1559 "dfast backend should match dictionary-primed history in first full block"
1560 );
1561}
1562
1563#[test]
1564fn prime_with_dictionary_does_not_inflate_reported_window_size() {
1565 let mut driver = MatchGeneratorDriver::new(8, 1);
1566 driver.reset(CompressionLevel::Fastest);
1567
1568 let before = driver.window_size();
1569 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
1570 let after = driver.window_size();
1571
1572 assert_eq!(
1573 after, before,
1574 "dictionary retention budget must not change reported frame window size"
1575 );
1576}
1577
1578#[test]
1579fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
1580 let mut driver = MatchGeneratorDriver::new(8, 2);
1581 driver.reset(CompressionLevel::Fastest);
1582
1583 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
1586
1587 assert!(
1588 driver
1589 .match_generator
1590 .window
1591 .iter()
1592 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
1593 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
1594 );
1595}
1596
1597#[test]
1598fn prime_with_dictionary_counts_only_committed_tail_budget() {
1599 let mut driver = MatchGeneratorDriver::new(8, 1);
1600 driver.reset(CompressionLevel::Fastest);
1601
1602 let before = driver.match_generator.max_window_size;
1603 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
1605
1606 assert_eq!(
1607 driver.match_generator.max_window_size,
1608 before + 8,
1609 "retention budget must account only for dictionary bytes actually committed to history"
1610 );
1611}
1612
1613#[test]
1614fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
1615 let mut driver = MatchGeneratorDriver::new(8, 1);
1616 driver.reset(CompressionLevel::Default);
1617
1618 let before = driver.dfast_matcher().max_window_size;
1619 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
1622
1623 assert_eq!(
1624 driver.dfast_matcher().max_window_size,
1625 before + 12,
1626 "dfast retention budget should include 4-byte dictionary tails"
1627 );
1628}
1629
1630#[test]
1631fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
1632 let mut driver = MatchGeneratorDriver::new(8, 1);
1633 driver.reset(CompressionLevel::Fastest);
1634
1635 let base_window = driver.match_generator.max_window_size;
1636 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
1637 assert_eq!(driver.match_generator.max_window_size, base_window + 24);
1638
1639 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
1640 let mut space = driver.get_next_space();
1641 space.clear();
1642 space.extend_from_slice(block);
1643 driver.commit_space(space);
1644 driver.skip_matching();
1645 }
1646
1647 assert_eq!(
1648 driver.dictionary_retained_budget, 0,
1649 "dictionary budget should be fully retired once primed dict slices are evicted"
1650 );
1651 assert_eq!(
1652 driver.match_generator.max_window_size, base_window,
1653 "retired dictionary budget must not remain reusable for live history"
1654 );
1655}
1656
1657#[test]
1658fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
1659 let mut driver = MatchGeneratorDriver::new(8, 1);
1660 driver.reset(CompressionLevel::Default);
1661 driver.dfast_matcher_mut().max_window_size = 8;
1664 driver.reported_window_size = 8;
1665
1666 let base_window = driver.dfast_matcher().max_window_size;
1667 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
1668 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
1669
1670 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
1671 let mut space = driver.get_next_space();
1672 space.clear();
1673 space.extend_from_slice(block);
1674 driver.commit_space(space);
1675 driver.skip_matching();
1676 }
1677
1678 assert_eq!(
1679 driver.dictionary_retained_budget, 0,
1680 "dictionary budget should be fully retired once primed dict slices are evicted"
1681 );
1682 assert_eq!(
1683 driver.dfast_matcher().max_window_size,
1684 base_window,
1685 "retired dictionary budget must not remain reusable for live history"
1686 );
1687}
1688
1689#[test]
1690fn suffix_store_with_single_slot_does_not_panic_on_keying() {
1691 let mut suffixes = SuffixStore::with_capacity(1);
1692 suffixes.insert(b"abcde", 0);
1693 assert!(suffixes.contains_key(b"abcde"));
1694 assert_eq!(suffixes.get(b"abcde"), Some(0));
1695}
1696
1697#[test]
1698fn fastest_reset_uses_interleaved_hash_fill_step() {
1699 let mut driver = MatchGeneratorDriver::new(32, 2);
1700
1701 driver.reset(CompressionLevel::Uncompressed);
1702 assert_eq!(driver.match_generator.hash_fill_step, 1);
1703
1704 driver.reset(CompressionLevel::Fastest);
1705 assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
1706
1707 driver.reset(CompressionLevel::Better);
1708 assert_eq!(driver.match_generator.hash_fill_step, 1);
1709}
1710
1711#[test]
1712fn simple_matcher_updates_offset_history_after_emitting_match() {
1713 let mut matcher = MatchGenerator::new(64);
1714 matcher.add_data(
1715 b"abcdeabcdeabcde".to_vec(),
1716 SuffixStore::with_capacity(64),
1717 |_, _| {},
1718 );
1719
1720 assert!(matcher.next_sequence(|seq| {
1721 assert_eq!(
1722 seq,
1723 Sequence::Triple {
1724 literals: b"abcde",
1725 offset: 5,
1726 match_len: 10,
1727 }
1728 );
1729 }));
1730 assert_eq!(matcher.offset_hist, [5, 1, 4]);
1731}
1732
1733#[test]
1734fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
1735 let mut matcher = MatchGenerator::new(64);
1736 matcher.add_data(
1737 b"abcdefghijabcdefghij".to_vec(),
1738 SuffixStore::with_capacity(64),
1739 |_, _| {},
1740 );
1741
1742 matcher.suffix_idx = 10;
1743 matcher.last_idx_in_sequence = 10;
1744 matcher.offset_hist = [99, 10, 4];
1745
1746 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
1747 assert_eq!(candidate, Some((10, 10)));
1748}
1749
1750#[test]
1751fn simple_matcher_repcode_can_target_previous_window_entry() {
1752 let mut matcher = MatchGenerator::new(64);
1753 matcher.add_data(
1754 b"abcdefghij".to_vec(),
1755 SuffixStore::with_capacity(64),
1756 |_, _| {},
1757 );
1758 matcher.skip_matching();
1759 matcher.add_data(
1760 b"abcdefghij".to_vec(),
1761 SuffixStore::with_capacity(64),
1762 |_, _| {},
1763 );
1764
1765 matcher.offset_hist = [99, 10, 4];
1766
1767 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
1768 assert_eq!(candidate, Some((10, 10)));
1769}
1770
1771#[test]
1772fn simple_matcher_zero_literal_repcode_checks_rep2() {
1773 let mut matcher = MatchGenerator::new(64);
1774 matcher.add_data(
1775 b"abcdefghijabcdefghij".to_vec(),
1776 SuffixStore::with_capacity(64),
1777 |_, _| {},
1778 );
1779 matcher.suffix_idx = 10;
1780 matcher.last_idx_in_sequence = 10;
1781 matcher.offset_hist = [99, 4, 10];
1783
1784 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
1785 assert_eq!(candidate, Some((10, 10)));
1786}
1787
1788#[test]
1789fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
1790 let mut matcher = MatchGenerator::new(64);
1791 matcher.add_data(
1792 b"abcdefghijabcdefghij".to_vec(),
1793 SuffixStore::with_capacity(64),
1794 |_, _| {},
1795 );
1796 matcher.suffix_idx = 10;
1797 matcher.last_idx_in_sequence = 10;
1798 matcher.offset_hist = [11, 4, 99];
1800
1801 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
1802 assert_eq!(candidate, Some((10, 10)));
1803}
1804
1805#[test]
1806fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
1807 let mut matcher = MatchGenerator::new(64);
1808 matcher.add_data(
1809 b"abcdefghij".to_vec(),
1810 SuffixStore::with_capacity(64),
1811 |_, _| {},
1812 );
1813 matcher.skip_matching();
1814 matcher.add_data(
1815 b"klmnopqrst".to_vec(),
1816 SuffixStore::with_capacity(64),
1817 |_, _| {},
1818 );
1819 matcher.suffix_idx = 3;
1820
1821 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
1822 assert_eq!(candidate, None);
1823}
1824
1825#[test]
1826fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
1827 let mut matcher = MatchGenerator::new(64);
1828 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
1829 matcher.add_data(
1830 b"abcdefghijklmnop".to_vec(),
1831 SuffixStore::with_capacity(64),
1832 |_, _| {},
1833 );
1834 matcher.skip_matching();
1835 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
1836
1837 assert!(matcher.next_sequence(|seq| {
1838 assert_eq!(
1839 seq,
1840 Sequence::Triple {
1841 literals: b"",
1842 offset: 15,
1843 match_len: 5,
1844 }
1845 );
1846 }));
1847 assert!(!matcher.next_sequence(|_| {}));
1848}
1849
1850#[test]
1851fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
1852 let mut matcher = MatchGenerator::new(64);
1853 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
1854 matcher.add_data(
1855 b"01234abcde".to_vec(),
1856 SuffixStore::with_capacity(64),
1857 |_, _| {},
1858 );
1859 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
1860
1861 let last = matcher.window.last().unwrap();
1862 let tail = &last.data[5..10];
1863 assert_eq!(last.suffixes.get(tail), Some(5));
1864}
1865
1866#[test]
1867fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
1868 let mut matcher = MatchGenerator::new(128);
1869 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
1870 matcher.add_data(
1871 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
1872 SuffixStore::with_capacity(1 << 16),
1873 |_, _| {},
1874 );
1875
1876 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
1877
1878 let last = matcher.window.last().unwrap();
1879 let first_key = &last.data[..MIN_MATCH_LEN];
1880 assert_eq!(last.suffixes.get(first_key), None);
1881}
1882
1883#[test]
1884fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
1885 let mut matcher = MatchGenerator::new(128);
1886 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
1887 matcher.add_data(
1888 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
1889 SuffixStore::with_capacity(1 << 16),
1890 |_, _| {},
1891 );
1892
1893 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
1894
1895 let last = matcher.window.last().unwrap();
1896 for pos in [0usize, 3, 6, 9, 12] {
1897 let key = &last.data[pos..pos + MIN_MATCH_LEN];
1898 assert_eq!(
1899 last.suffixes.get(key),
1900 Some(pos),
1901 "expected interleaved suffix registration at pos {pos}"
1902 );
1903 }
1904}
1905
1906#[test]
1907fn dfast_skip_matching_handles_window_eviction() {
1908 let mut matcher = DfastMatchGenerator::new(16);
1909
1910 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
1911 matcher.skip_matching();
1912 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
1913 matcher.skip_matching();
1914 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
1915
1916 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
1917 matcher.start_matching(|seq| match seq {
1918 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
1919 Sequence::Triple {
1920 literals,
1921 offset,
1922 match_len,
1923 } => {
1924 reconstructed.extend_from_slice(literals);
1925 let start = reconstructed.len() - offset;
1926 for i in 0..match_len {
1927 let byte = reconstructed[start + i];
1928 reconstructed.push(byte);
1929 }
1930 }
1931 });
1932
1933 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
1934}
1935
1936#[test]
1937fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
1938 let mut matcher = DfastMatchGenerator::new(8);
1939
1940 let mut first = Vec::with_capacity(64);
1941 first.extend_from_slice(b"abcdefgh");
1942 matcher.add_data(first, |_| {});
1943
1944 let mut second = Vec::with_capacity(64);
1945 second.extend_from_slice(b"ijklmnop");
1946
1947 let mut observed_evicted_len = None;
1948 matcher.add_data(second, |data| {
1949 observed_evicted_len = Some(data.len());
1950 });
1951
1952 assert_eq!(
1953 observed_evicted_len,
1954 Some(8),
1955 "eviction callback must report evicted byte length, not backing capacity"
1956 );
1957}
1958
1959#[test]
1960fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
1961 let mut matcher = DfastMatchGenerator::new(16);
1962
1963 let mut first = Vec::with_capacity(64);
1964 first.extend_from_slice(b"abcdefgh");
1965 matcher.add_data(first, |_| {});
1966
1967 let mut second = Vec::with_capacity(64);
1968 second.extend_from_slice(b"ijklmnop");
1969 matcher.add_data(second, |_| {});
1970
1971 matcher.max_window_size = 8;
1972
1973 let mut observed_evicted_len = None;
1974 matcher.trim_to_window(|data| {
1975 observed_evicted_len = Some(data.len());
1976 });
1977
1978 assert_eq!(
1979 observed_evicted_len,
1980 Some(8),
1981 "trim callback must report evicted byte length, not backing capacity"
1982 );
1983}
1984
1985#[test]
1986fn dfast_inserts_tail_positions_for_next_block_matching() {
1987 let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
1988
1989 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
1990 let mut history = Vec::new();
1991 matcher.start_matching(|seq| match seq {
1992 Sequence::Literals { literals } => history.extend_from_slice(literals),
1993 Sequence::Triple { .. } => unreachable!("first block should not match history"),
1994 });
1995 assert_eq!(history, b"012345bcdea");
1996
1997 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
1998 let mut saw_first_sequence = false;
1999 matcher.start_matching(|seq| {
2000 assert!(!saw_first_sequence, "expected a single cross-block match");
2001 saw_first_sequence = true;
2002 match seq {
2003 Sequence::Literals { .. } => {
2004 panic!("expected tail-anchored cross-block match before any literals")
2005 }
2006 Sequence::Triple {
2007 literals,
2008 offset,
2009 match_len,
2010 } => {
2011 assert_eq!(literals, b"");
2012 assert_eq!(offset, 5);
2013 assert_eq!(match_len, 11);
2014 let start = history.len() - offset;
2015 for i in 0..match_len {
2016 let byte = history[start + i];
2017 history.push(byte);
2018 }
2019 }
2020 }
2021 });
2022
2023 assert!(
2024 saw_first_sequence,
2025 "expected tail-anchored cross-block match"
2026 );
2027 assert_eq!(history, b"012345bcdeabcdeabcdeab");
2028}