1use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10use core::convert::TryInto;
11use core::num::NonZeroUsize;
12
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17
18const MIN_MATCH_LEN: usize = 5;
19const DFAST_MIN_MATCH_LEN: usize = 6;
20const DFAST_TARGET_LEN: usize = 48;
21const DFAST_HASH_BITS: usize = 20;
24const DFAST_SEARCH_DEPTH: usize = 4;
25const DFAST_DEFAULT_WINDOW_SIZE: usize = 1 << 22;
26const DFAST_EMPTY_SLOT: usize = usize::MAX;
27
28#[derive(Copy, Clone, PartialEq, Eq)]
29enum MatcherBackend {
30 Simple,
31 Dfast,
32}
33
34pub struct MatchGeneratorDriver {
36 vec_pool: Vec<Vec<u8>>,
37 suffix_pool: Vec<SuffixStore>,
38 match_generator: MatchGenerator,
39 dfast_match_generator: Option<DfastMatchGenerator>,
40 active_backend: MatcherBackend,
41 slice_size: usize,
42 base_slice_size: usize,
43 base_window_size: usize,
44}
45
46impl MatchGeneratorDriver {
47 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
50 let max_window_size = max_slices_in_window * slice_size;
51 Self {
52 vec_pool: Vec::new(),
53 suffix_pool: Vec::new(),
54 match_generator: MatchGenerator::new(max_window_size),
55 dfast_match_generator: None,
56 active_backend: MatcherBackend::Simple,
57 slice_size,
58 base_slice_size: slice_size,
59 base_window_size: max_window_size,
60 }
61 }
62
63 fn level_config(&self, level: CompressionLevel) -> (MatcherBackend, usize, usize) {
64 match level {
65 CompressionLevel::Uncompressed => (
66 MatcherBackend::Simple,
67 self.base_slice_size,
68 self.base_window_size,
69 ),
70 CompressionLevel::Fastest => (
71 MatcherBackend::Simple,
72 self.base_slice_size,
73 self.base_window_size,
74 ),
75 CompressionLevel::Default => (
76 MatcherBackend::Dfast,
77 self.base_slice_size,
78 DFAST_DEFAULT_WINDOW_SIZE,
79 ),
80 CompressionLevel::Better => (
81 MatcherBackend::Simple,
82 self.base_slice_size,
83 self.base_window_size,
84 ),
85 CompressionLevel::Best => (
86 MatcherBackend::Simple,
87 self.base_slice_size,
88 self.base_window_size,
89 ),
90 }
91 }
92
93 fn dfast_matcher(&self) -> &DfastMatchGenerator {
94 self.dfast_match_generator
95 .as_ref()
96 .expect("dfast backend must be initialized by reset() before use")
97 }
98
99 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
100 self.dfast_match_generator
101 .as_mut()
102 .expect("dfast backend must be initialized by reset() before use")
103 }
104}
105
106impl Matcher for MatchGeneratorDriver {
107 fn reset(&mut self, level: CompressionLevel) {
108 let (backend, slice_size, max_window_size) = self.level_config(level);
109 if self.active_backend != backend {
110 match self.active_backend {
111 MatcherBackend::Simple => {
112 let vec_pool = &mut self.vec_pool;
113 let suffix_pool = &mut self.suffix_pool;
114 self.match_generator.reset(|mut data, mut suffixes| {
115 data.resize(data.capacity(), 0);
116 vec_pool.push(data);
117 suffixes.slots.clear();
118 suffixes.slots.resize(suffixes.slots.capacity(), None);
119 suffix_pool.push(suffixes);
120 });
121 }
122 MatcherBackend::Dfast => {
123 if let Some(dfast) = self.dfast_match_generator.as_mut() {
124 let vec_pool = &mut self.vec_pool;
125 dfast.reset(|mut data| {
126 data.resize(data.capacity(), 0);
127 vec_pool.push(data);
128 });
129 }
130 }
131 }
132 }
133
134 self.active_backend = backend;
135 self.slice_size = slice_size;
136 match self.active_backend {
137 MatcherBackend::Simple => {
138 let vec_pool = &mut self.vec_pool;
139 let suffix_pool = &mut self.suffix_pool;
140 self.match_generator.max_window_size = max_window_size;
141 self.match_generator.reset(|mut data, mut suffixes| {
142 data.resize(data.capacity(), 0);
143 vec_pool.push(data);
144 suffixes.slots.clear();
145 suffixes.slots.resize(suffixes.slots.capacity(), None);
146 suffix_pool.push(suffixes);
147 });
148 }
149 MatcherBackend::Dfast => {
150 let dfast = self
151 .dfast_match_generator
152 .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
153 dfast.max_window_size = max_window_size;
154 let vec_pool = &mut self.vec_pool;
155 dfast.reset(|mut data| {
156 data.resize(data.capacity(), 0);
157 vec_pool.push(data);
158 });
159 }
160 }
161 }
162
163 fn window_size(&self) -> u64 {
164 match self.active_backend {
165 MatcherBackend::Simple => self.match_generator.max_window_size as u64,
166 MatcherBackend::Dfast => self.dfast_matcher().max_window_size as u64,
167 }
168 }
169
170 fn get_next_space(&mut self) -> Vec<u8> {
171 self.vec_pool.pop().unwrap_or_else(|| {
172 let mut space = alloc::vec![0; self.slice_size];
173 space.resize(space.capacity(), 0);
174 space
175 })
176 }
177
178 fn get_last_space(&mut self) -> &[u8] {
179 match self.active_backend {
180 MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
181 MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
182 }
183 }
184
185 fn commit_space(&mut self, space: Vec<u8>) {
186 match self.active_backend {
187 MatcherBackend::Simple => {
188 let vec_pool = &mut self.vec_pool;
189 let suffixes = self
190 .suffix_pool
191 .pop()
192 .unwrap_or_else(|| SuffixStore::with_capacity(space.len()));
193 let suffix_pool = &mut self.suffix_pool;
194 self.match_generator
195 .add_data(space, suffixes, |mut data, mut suffixes| {
196 data.resize(data.capacity(), 0);
197 vec_pool.push(data);
198 suffixes.slots.clear();
199 suffixes.slots.resize(suffixes.slots.capacity(), None);
200 suffix_pool.push(suffixes);
201 });
202 }
203 MatcherBackend::Dfast => {
204 let vec_pool = &mut self.vec_pool;
205 self.dfast_match_generator
206 .as_mut()
207 .expect("dfast backend must be initialized by reset() before use")
208 .add_data(space, |mut data| {
209 data.resize(data.capacity(), 0);
210 vec_pool.push(data);
211 });
212 }
213 }
214 }
215
216 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
217 match self.active_backend {
218 MatcherBackend::Simple => {
219 while self.match_generator.next_sequence(&mut handle_sequence) {}
220 }
221 MatcherBackend::Dfast => self
222 .dfast_matcher_mut()
223 .start_matching(&mut handle_sequence),
224 }
225 }
226 fn skip_matching(&mut self) {
227 match self.active_backend {
228 MatcherBackend::Simple => self.match_generator.skip_matching(),
229 MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
230 }
231 }
232}
233
234struct SuffixStore {
237 slots: Vec<Option<NonZeroUsize>>,
241 len_log: u32,
242}
243
244impl SuffixStore {
245 fn with_capacity(capacity: usize) -> Self {
246 Self {
247 slots: alloc::vec![None; capacity],
248 len_log: capacity.ilog2(),
249 }
250 }
251
252 #[inline(always)]
253 fn insert(&mut self, suffix: &[u8], idx: usize) {
254 let key = self.key(suffix);
255 self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
256 }
257
258 #[inline(always)]
259 fn contains_key(&self, suffix: &[u8]) -> bool {
260 let key = self.key(suffix);
261 self.slots[key].is_some()
262 }
263
264 #[inline(always)]
265 fn get(&self, suffix: &[u8]) -> Option<usize> {
266 let key = self.key(suffix);
267 self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
268 }
269
270 #[inline(always)]
271 fn key(&self, suffix: &[u8]) -> usize {
272 let s0 = suffix[0] as u64;
273 let s1 = suffix[1] as u64;
274 let s2 = suffix[2] as u64;
275 let s3 = suffix[3] as u64;
276 let s4 = suffix[4] as u64;
277
278 const POLY: u64 = 0xCF3BCCDCABu64;
279
280 let s0 = (s0 << 24).wrapping_mul(POLY);
281 let s1 = (s1 << 32).wrapping_mul(POLY);
282 let s2 = (s2 << 40).wrapping_mul(POLY);
283 let s3 = (s3 << 48).wrapping_mul(POLY);
284 let s4 = (s4 << 56).wrapping_mul(POLY);
285
286 let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
287 let index = index >> (64 - self.len_log);
288 index as usize % self.slots.len()
289 }
290}
291
292struct WindowEntry {
295 data: Vec<u8>,
296 suffixes: SuffixStore,
298 base_offset: usize,
300}
301
302pub(crate) struct MatchGenerator {
303 max_window_size: usize,
304 window: Vec<WindowEntry>,
307 window_size: usize,
308 #[cfg(debug_assertions)]
309 concat_window: Vec<u8>,
310 suffix_idx: usize,
312 last_idx_in_sequence: usize,
314}
315
316impl MatchGenerator {
317 fn new(max_size: usize) -> Self {
319 Self {
320 max_window_size: max_size,
321 window: Vec::new(),
322 window_size: 0,
323 #[cfg(debug_assertions)]
324 concat_window: Vec::new(),
325 suffix_idx: 0,
326 last_idx_in_sequence: 0,
327 }
328 }
329
330 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
331 self.window_size = 0;
332 #[cfg(debug_assertions)]
333 self.concat_window.clear();
334 self.suffix_idx = 0;
335 self.last_idx_in_sequence = 0;
336 self.window.drain(..).for_each(|entry| {
337 reuse_space(entry.data, entry.suffixes);
338 });
339 }
340
341 fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
346 loop {
347 let last_entry = self.window.last().unwrap();
348 let data_slice = &last_entry.data;
349
350 if self.suffix_idx >= data_slice.len() {
352 if self.last_idx_in_sequence != self.suffix_idx {
353 let literals = &data_slice[self.last_idx_in_sequence..];
354 self.last_idx_in_sequence = self.suffix_idx;
355 handle_sequence(Sequence::Literals { literals });
356 return true;
357 } else {
358 return false;
359 }
360 }
361
362 let data_slice = &data_slice[self.suffix_idx..];
364 if data_slice.len() < MIN_MATCH_LEN {
365 let last_idx_in_sequence = self.last_idx_in_sequence;
366 self.last_idx_in_sequence = last_entry.data.len();
367 self.suffix_idx = last_entry.data.len();
368 handle_sequence(Sequence::Literals {
369 literals: &last_entry.data[last_idx_in_sequence..],
370 });
371 return true;
372 }
373
374 let key = &data_slice[..MIN_MATCH_LEN];
376
377 let mut candidate = None;
379 for (match_entry_idx, match_entry) in self.window.iter().enumerate() {
380 let is_last = match_entry_idx == self.window.len() - 1;
381 if let Some(match_index) = match_entry.suffixes.get(key) {
382 let match_slice = if is_last {
383 &match_entry.data[match_index..self.suffix_idx]
384 } else {
385 &match_entry.data[match_index..]
386 };
387
388 let match_len = Self::common_prefix_len(match_slice, data_slice);
390
391 if match_len >= MIN_MATCH_LEN {
393 let offset = match_entry.base_offset + self.suffix_idx - match_index;
394
395 #[cfg(debug_assertions)]
397 {
398 let unprocessed = last_entry.data.len() - self.suffix_idx;
399 let start = self.concat_window.len() - unprocessed - offset;
400 let end = start + match_len;
401 let check_slice = &self.concat_window[start..end];
402 debug_assert_eq!(check_slice, &match_slice[..match_len]);
403 }
404
405 if let Some((old_offset, old_match_len)) = candidate {
406 if match_len > old_match_len
407 || (match_len == old_match_len && offset < old_offset)
408 {
409 candidate = Some((offset, match_len));
410 }
411 } else {
412 candidate = Some((offset, match_len));
413 }
414 }
415 }
416 }
417
418 if let Some((offset, match_len)) = candidate {
419 self.add_suffixes_till(self.suffix_idx + match_len);
422
423 let last_entry = self.window.last().unwrap();
425 let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
426
427 self.suffix_idx += match_len;
429 self.last_idx_in_sequence = self.suffix_idx;
430 handle_sequence(Sequence::Triple {
431 literals,
432 offset,
433 match_len,
434 });
435
436 return true;
437 }
438
439 let last_entry = self.window.last_mut().unwrap();
440 let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
441 if !last_entry.suffixes.contains_key(key) {
442 last_entry.suffixes.insert(key, self.suffix_idx);
443 }
444 self.suffix_idx += 1;
445 }
446 }
447
448 #[inline(always)]
450 fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
451 Self::mismatch_chunks::<8>(a, b)
452 }
453
454 fn mismatch_chunks<const N: usize>(xs: &[u8], ys: &[u8]) -> usize {
457 let off = core::iter::zip(xs.chunks_exact(N), ys.chunks_exact(N))
458 .take_while(|(x, y)| x == y)
459 .count()
460 * N;
461 off + core::iter::zip(&xs[off..], &ys[off..])
462 .take_while(|(x, y)| x == y)
463 .count()
464 }
465
466 #[inline(always)]
468 fn add_suffixes_till(&mut self, idx: usize) {
469 let last_entry = self.window.last_mut().unwrap();
470 if last_entry.data.len() < MIN_MATCH_LEN {
471 return;
472 }
473 let slice = &last_entry.data[self.suffix_idx..idx];
474 for (key_index, key) in slice.windows(MIN_MATCH_LEN).enumerate() {
475 if !last_entry.suffixes.contains_key(key) {
476 last_entry.suffixes.insert(key, self.suffix_idx + key_index);
477 }
478 }
479 }
480
481 fn skip_matching(&mut self) {
483 let len = self.window.last().unwrap().data.len();
484 self.add_suffixes_till(len);
485 self.suffix_idx = len;
486 self.last_idx_in_sequence = len;
487 }
488
489 fn add_data(
492 &mut self,
493 data: Vec<u8>,
494 suffixes: SuffixStore,
495 reuse_space: impl FnMut(Vec<u8>, SuffixStore),
496 ) {
497 assert!(
498 self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
499 );
500 self.reserve(data.len(), reuse_space);
501 #[cfg(debug_assertions)]
502 self.concat_window.extend_from_slice(&data);
503
504 if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
505 for entry in self.window.iter_mut() {
506 entry.base_offset += last_len;
507 }
508 }
509
510 let len = data.len();
511 self.window.push(WindowEntry {
512 data,
513 suffixes,
514 base_offset: 0,
515 });
516 self.window_size += len;
517 self.suffix_idx = 0;
518 self.last_idx_in_sequence = 0;
519 }
520
521 fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
524 assert!(self.max_window_size >= amount);
525 while self.window_size + amount > self.max_window_size {
526 let removed = self.window.remove(0);
527 self.window_size -= removed.data.len();
528 #[cfg(debug_assertions)]
529 self.concat_window.drain(0..removed.data.len());
530
531 let WindowEntry {
532 suffixes,
533 data: leaked_vec,
534 base_offset: _,
535 } = removed;
536 reuse_space(leaked_vec, suffixes);
537 }
538 }
539}
540
541struct DfastMatchGenerator {
542 max_window_size: usize,
543 window: VecDeque<Vec<u8>>,
544 window_size: usize,
545 history: Vec<u8>,
548 history_start: usize,
549 history_abs_start: usize,
550 offset_hist: [u32; 3],
551 short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
552 long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
553}
554
555#[derive(Copy, Clone)]
556struct MatchCandidate {
557 start: usize,
558 offset: usize,
559 match_len: usize,
560}
561
562impl DfastMatchGenerator {
563 fn new(max_window_size: usize) -> Self {
564 Self {
565 max_window_size,
566 window: VecDeque::new(),
567 window_size: 0,
568 history: Vec::new(),
569 history_start: 0,
570 history_abs_start: 0,
571 offset_hist: [1, 4, 8],
572 short_hash: Vec::new(),
573 long_hash: Vec::new(),
574 }
575 }
576
577 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
578 self.window_size = 0;
579 self.history.clear();
580 self.history_start = 0;
581 self.history_abs_start = 0;
582 self.offset_hist = [1, 4, 8];
583 if !self.short_hash.is_empty() {
584 self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
585 self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
586 }
587 for mut data in self.window.drain(..) {
588 data.resize(data.capacity(), 0);
589 reuse_space(data);
590 }
591 }
592
593 fn get_last_space(&self) -> &[u8] {
594 self.window.back().unwrap().as_slice()
595 }
596
597 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
598 assert!(data.len() <= self.max_window_size);
599 while self.window_size + data.len() > self.max_window_size {
600 let mut removed = self.window.pop_front().unwrap();
601 self.window_size -= removed.len();
602 self.history_start += removed.len();
603 self.history_abs_start += removed.len();
604 removed.resize(removed.capacity(), 0);
605 reuse_space(removed);
606 }
607 self.compact_history();
608 self.history.extend_from_slice(&data);
609 self.window_size += data.len();
610 self.window.push_back(data);
611 }
612
613 fn skip_matching(&mut self) {
614 self.ensure_hash_tables();
615 let current_len = self.window.back().unwrap().len();
616 let current_abs_start = self.history_abs_start + self.window_size - current_len;
617 self.insert_positions(current_abs_start, current_abs_start + current_len);
618 }
619
620 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
621 self.ensure_hash_tables();
622
623 let current_len = self.window.back().unwrap().len();
624 if current_len == 0 {
625 return;
626 }
627
628 let current_abs_start = self.history_abs_start + self.window_size - current_len;
629
630 let mut pos = 0usize;
631 let mut literals_start = 0usize;
632 while pos + DFAST_MIN_MATCH_LEN <= current_len {
633 let abs_pos = current_abs_start + pos;
634 let lit_len = pos - literals_start;
635
636 let best = self.best_match(abs_pos, lit_len);
637 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
638 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
639 let current = self.window.back().unwrap().as_slice();
640 let start = candidate.start - current_abs_start;
641 let literals = ¤t[literals_start..start];
642 handle_sequence(Sequence::Triple {
643 literals,
644 offset: candidate.offset,
645 match_len: candidate.match_len,
646 });
647 let _ = encode_offset_with_history(
650 candidate.offset as u32,
651 literals.len() as u32,
652 &mut self.offset_hist,
653 );
654 pos = start + candidate.match_len;
655 literals_start = pos;
656 } else {
657 self.insert_position(abs_pos);
658 pos += 1;
659 }
660 }
661
662 if literals_start < current_len {
663 let current = self.window.back().unwrap().as_slice();
670 handle_sequence(Sequence::Literals {
671 literals: ¤t[literals_start..],
672 });
673 }
674 }
675
676 fn ensure_hash_tables(&mut self) {
677 if self.short_hash.is_empty() {
678 self.short_hash =
682 alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
683 self.long_hash =
684 alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
685 }
686 }
687
688 fn compact_history(&mut self) {
689 if self.history_start == 0 {
690 return;
691 }
692 if self.history_start >= self.max_window_size
693 || self.history_start * 2 >= self.history.len()
694 {
695 self.history.drain(..self.history_start);
696 self.history_start = 0;
697 }
698 }
699
700 fn live_history(&self) -> &[u8] {
701 &self.history[self.history_start..]
702 }
703
704 fn history_abs_end(&self) -> usize {
705 self.history_abs_start + self.live_history().len()
706 }
707
708 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
709 let rep = self.repcode_candidate(abs_pos, lit_len);
710 let hash = self.hash_candidate(abs_pos, lit_len);
711 Self::better_candidate(rep, hash)
712 }
713
714 fn pick_lazy_match(
715 &self,
716 abs_pos: usize,
717 lit_len: usize,
718 best: Option<MatchCandidate>,
719 ) -> Option<MatchCandidate> {
720 let best = best?;
721 if best.match_len >= DFAST_TARGET_LEN
722 || abs_pos + 1 + DFAST_MIN_MATCH_LEN > self.history_abs_end()
723 {
724 return Some(best);
725 }
726
727 let next = self.best_match(abs_pos + 1, lit_len + 1);
728 match next {
729 Some(next)
730 if next.match_len > best.match_len
731 || (next.match_len == best.match_len && next.offset < best.offset) =>
732 {
733 None
734 }
735 _ => Some(best),
736 }
737 }
738
739 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
740 let reps = if lit_len == 0 {
741 [
742 Some(self.offset_hist[1] as usize),
743 Some(self.offset_hist[2] as usize),
744 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
745 ]
746 } else {
747 [
748 Some(self.offset_hist[0] as usize),
749 Some(self.offset_hist[1] as usize),
750 Some(self.offset_hist[2] as usize),
751 ]
752 };
753
754 let mut best = None;
755 for rep in reps.into_iter().flatten() {
756 if rep == 0 || rep > abs_pos {
757 continue;
758 }
759 let candidate_pos = abs_pos - rep;
760 if candidate_pos < self.history_abs_start {
761 continue;
762 }
763 let concat = self.live_history();
764 let candidate_idx = candidate_pos - self.history_abs_start;
765 let current_idx = abs_pos - self.history_abs_start;
766 if current_idx + DFAST_MIN_MATCH_LEN > concat.len() {
767 continue;
768 }
769 let match_len =
770 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
771 if match_len >= DFAST_MIN_MATCH_LEN {
772 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
773 best = Self::better_candidate(best, Some(candidate));
774 }
775 }
776 best
777 }
778
779 fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
780 let concat = self.live_history();
781 let current_idx = abs_pos - self.history_abs_start;
782 let mut best = None;
783 for candidate_pos in self.long_candidates(abs_pos) {
784 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
785 continue;
786 }
787 let candidate_idx = candidate_pos - self.history_abs_start;
788 let match_len =
789 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
790 if match_len >= DFAST_MIN_MATCH_LEN {
791 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
792 best = Self::better_candidate(best, Some(candidate));
793 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
794 return best;
795 }
796 }
797 }
798
799 for candidate_pos in self.short_candidates(abs_pos) {
800 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
801 continue;
802 }
803 let candidate_idx = candidate_pos - self.history_abs_start;
804 let match_len =
805 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
806 if match_len >= DFAST_MIN_MATCH_LEN {
807 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
808 best = Self::better_candidate(best, Some(candidate));
809 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
810 return best;
811 }
812 }
813 }
814 best
815 }
816
817 fn extend_backwards(
818 &self,
819 mut candidate_pos: usize,
820 mut abs_pos: usize,
821 mut match_len: usize,
822 lit_len: usize,
823 ) -> MatchCandidate {
824 let concat = self.live_history();
825 let min_abs_pos = abs_pos - lit_len;
826 while abs_pos > min_abs_pos
827 && candidate_pos > self.history_abs_start
828 && concat[candidate_pos - self.history_abs_start - 1]
829 == concat[abs_pos - self.history_abs_start - 1]
830 {
831 candidate_pos -= 1;
832 abs_pos -= 1;
833 match_len += 1;
834 }
835 MatchCandidate {
836 start: abs_pos,
837 offset: abs_pos - candidate_pos,
838 match_len,
839 }
840 }
841
842 fn better_candidate(
843 lhs: Option<MatchCandidate>,
844 rhs: Option<MatchCandidate>,
845 ) -> Option<MatchCandidate> {
846 match (lhs, rhs) {
847 (None, other) | (other, None) => other,
848 (Some(lhs), Some(rhs)) => {
849 if rhs.match_len > lhs.match_len
850 || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
851 {
852 Some(rhs)
853 } else {
854 Some(lhs)
855 }
856 }
857 }
858 }
859
860 fn insert_positions(&mut self, start: usize, end: usize) {
861 for pos in start..end {
862 self.insert_position(pos);
863 }
864 }
865
866 fn insert_position(&mut self, pos: usize) {
867 let idx = pos - self.history_abs_start;
868 let short = {
869 let concat = self.live_history();
870 (idx + 4 <= concat.len()).then(|| Self::hash4(&concat[idx..]))
871 };
872 if let Some(short) = short {
873 let bucket = &mut self.short_hash[short];
874 if bucket[0] != pos {
875 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
876 bucket[0] = pos;
877 }
878 }
879
880 let long = {
881 let concat = self.live_history();
882 (idx + 8 <= concat.len()).then(|| Self::hash8(&concat[idx..]))
883 };
884 if let Some(long) = long {
885 let bucket = &mut self.long_hash[long];
886 if bucket[0] != pos {
887 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
888 bucket[0] = pos;
889 }
890 }
891 }
892
893 fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
894 let concat = self.live_history();
895 let idx = pos - self.history_abs_start;
896 (idx + 4 <= concat.len())
897 .then(|| self.short_hash[Self::hash4(&concat[idx..])])
898 .into_iter()
899 .flatten()
900 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
901 }
902
903 fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
904 let concat = self.live_history();
905 let idx = pos - self.history_abs_start;
906 (idx + 8 <= concat.len())
907 .then(|| self.long_hash[Self::hash8(&concat[idx..])])
908 .into_iter()
909 .flatten()
910 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
911 }
912
913 fn hash4(data: &[u8]) -> usize {
914 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
915 Self::hash_bits(value)
916 }
917
918 fn hash8(data: &[u8]) -> usize {
919 let value = u64::from_le_bytes(data[..8].try_into().unwrap());
920 Self::hash_bits(value)
921 }
922
923 fn hash_bits(value: u64) -> usize {
924 const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
925 ((value.wrapping_mul(PRIME)) >> (64 - DFAST_HASH_BITS)) as usize
926 }
927}
928
929#[test]
930fn matches() {
931 let mut matcher = MatchGenerator::new(1000);
932 let mut original_data = Vec::new();
933 let mut reconstructed = Vec::new();
934
935 let assert_seq_equal = |seq1: Sequence<'_>, seq2: Sequence<'_>, reconstructed: &mut Vec<u8>| {
936 assert_eq!(seq1, seq2);
937 match seq2 {
938 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
939 Sequence::Triple {
940 literals,
941 offset,
942 match_len,
943 } => {
944 reconstructed.extend_from_slice(literals);
945 let start = reconstructed.len() - offset;
946 let end = start + match_len;
947 reconstructed.extend_from_within(start..end);
948 }
949 }
950 };
951
952 matcher.add_data(
953 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
954 SuffixStore::with_capacity(100),
955 |_, _| {},
956 );
957 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
958
959 matcher.next_sequence(|seq| {
960 assert_seq_equal(
961 seq,
962 Sequence::Triple {
963 literals: &[0, 0, 0, 0, 0],
964 offset: 5,
965 match_len: 5,
966 },
967 &mut reconstructed,
968 )
969 });
970
971 assert!(!matcher.next_sequence(|_| {}));
972
973 matcher.add_data(
974 alloc::vec![
975 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
976 ],
977 SuffixStore::with_capacity(100),
978 |_, _| {},
979 );
980 original_data.extend_from_slice(&[
981 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
982 ]);
983
984 matcher.next_sequence(|seq| {
985 assert_seq_equal(
986 seq,
987 Sequence::Triple {
988 literals: &[1, 2, 3, 4, 5, 6],
989 offset: 6,
990 match_len: 6,
991 },
992 &mut reconstructed,
993 )
994 });
995 matcher.next_sequence(|seq| {
996 assert_seq_equal(
997 seq,
998 Sequence::Triple {
999 literals: &[],
1000 offset: 12,
1001 match_len: 6,
1002 },
1003 &mut reconstructed,
1004 )
1005 });
1006 matcher.next_sequence(|seq| {
1007 assert_seq_equal(
1008 seq,
1009 Sequence::Triple {
1010 literals: &[],
1011 offset: 28,
1012 match_len: 5,
1013 },
1014 &mut reconstructed,
1015 )
1016 });
1017 assert!(!matcher.next_sequence(|_| {}));
1018
1019 matcher.add_data(
1020 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
1021 SuffixStore::with_capacity(100),
1022 |_, _| {},
1023 );
1024 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
1025
1026 matcher.next_sequence(|seq| {
1027 assert_seq_equal(
1028 seq,
1029 Sequence::Triple {
1030 literals: &[],
1031 offset: 23,
1032 match_len: 6,
1033 },
1034 &mut reconstructed,
1035 )
1036 });
1037 matcher.next_sequence(|seq| {
1038 assert_seq_equal(
1039 seq,
1040 Sequence::Triple {
1041 literals: &[7, 8, 9, 10, 11],
1042 offset: 16,
1043 match_len: 5,
1044 },
1045 &mut reconstructed,
1046 )
1047 });
1048 assert!(!matcher.next_sequence(|_| {}));
1049
1050 matcher.add_data(
1051 alloc::vec![0, 0, 0, 0, 0],
1052 SuffixStore::with_capacity(100),
1053 |_, _| {},
1054 );
1055 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
1056
1057 matcher.next_sequence(|seq| {
1058 assert_seq_equal(
1059 seq,
1060 Sequence::Triple {
1061 literals: &[],
1062 offset: 5,
1063 match_len: 5,
1064 },
1065 &mut reconstructed,
1066 )
1067 });
1068 assert!(!matcher.next_sequence(|_| {}));
1069
1070 matcher.add_data(
1071 alloc::vec![7, 8, 9, 10, 11],
1072 SuffixStore::with_capacity(100),
1073 |_, _| {},
1074 );
1075 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
1076
1077 matcher.next_sequence(|seq| {
1078 assert_seq_equal(
1079 seq,
1080 Sequence::Triple {
1081 literals: &[],
1082 offset: 15,
1083 match_len: 5,
1084 },
1085 &mut reconstructed,
1086 )
1087 });
1088 assert!(!matcher.next_sequence(|_| {}));
1089
1090 matcher.add_data(
1091 alloc::vec![1, 3, 5, 7, 9],
1092 SuffixStore::with_capacity(100),
1093 |_, _| {},
1094 );
1095 matcher.skip_matching();
1096 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
1097 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
1098 assert!(!matcher.next_sequence(|_| {}));
1099
1100 matcher.add_data(
1101 alloc::vec![1, 3, 5, 7, 9],
1102 SuffixStore::with_capacity(100),
1103 |_, _| {},
1104 );
1105 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
1106
1107 matcher.next_sequence(|seq| {
1108 assert_seq_equal(
1109 seq,
1110 Sequence::Triple {
1111 literals: &[],
1112 offset: 5,
1113 match_len: 5,
1114 },
1115 &mut reconstructed,
1116 )
1117 });
1118 assert!(!matcher.next_sequence(|_| {}));
1119
1120 matcher.add_data(
1121 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
1122 SuffixStore::with_capacity(100),
1123 |_, _| {},
1124 );
1125 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
1126
1127 matcher.next_sequence(|seq| {
1128 assert_seq_equal(
1129 seq,
1130 Sequence::Triple {
1131 literals: &[0, 0, 11, 13, 15, 17, 20],
1132 offset: 5,
1133 match_len: 5,
1134 },
1135 &mut reconstructed,
1136 )
1137 });
1138 matcher.next_sequence(|seq| {
1139 assert_seq_equal(
1140 seq,
1141 Sequence::Literals {
1142 literals: &[21, 23],
1143 },
1144 &mut reconstructed,
1145 )
1146 });
1147 assert!(!matcher.next_sequence(|_| {}));
1148
1149 assert_eq!(reconstructed, original_data);
1150}
1151
1152#[test]
1153fn dfast_matches_roundtrip_multi_block_pattern() {
1154 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
1155 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
1156 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
1157
1158 let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
1159 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
1160 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
1161 Sequence::Triple {
1162 literals,
1163 offset,
1164 match_len,
1165 } => {
1166 decoded.extend_from_slice(literals);
1167 let start = decoded.len() - offset;
1168 for i in 0..match_len {
1169 let byte = decoded[start + i];
1170 decoded.push(byte);
1171 }
1172 }
1173 };
1174
1175 matcher.add_data(first_block.clone(), |_| {});
1176 let mut history = Vec::new();
1177 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
1178 assert_eq!(history, first_block);
1179
1180 matcher.add_data(second_block.clone(), |_| {});
1181 let prefix_len = history.len();
1182 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
1183
1184 assert_eq!(&history[prefix_len..], second_block.as_slice());
1185}
1186
1187#[test]
1188fn driver_switches_backends_and_initializes_dfast_via_reset() {
1189 let mut driver = MatchGeneratorDriver::new(32, 2);
1190
1191 driver.reset(CompressionLevel::Default);
1192 assert_eq!(driver.window_size(), DFAST_DEFAULT_WINDOW_SIZE as u64);
1193
1194 let mut first = driver.get_next_space();
1195 first[..12].copy_from_slice(b"abcabcabcabc");
1196 first.truncate(12);
1197 driver.commit_space(first);
1198 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
1199 driver.skip_matching();
1200
1201 let mut second = driver.get_next_space();
1202 second[..12].copy_from_slice(b"abcabcabcabc");
1203 second.truncate(12);
1204 driver.commit_space(second);
1205
1206 let mut reconstructed = b"abcabcabcabc".to_vec();
1207 driver.start_matching(|seq| match seq {
1208 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
1209 Sequence::Triple {
1210 literals,
1211 offset,
1212 match_len,
1213 } => {
1214 reconstructed.extend_from_slice(literals);
1215 let start = reconstructed.len() - offset;
1216 for i in 0..match_len {
1217 let byte = reconstructed[start + i];
1218 reconstructed.push(byte);
1219 }
1220 }
1221 });
1222 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
1223
1224 driver.reset(CompressionLevel::Fastest);
1225 assert_eq!(driver.window_size(), 64);
1226}
1227
1228#[test]
1229fn dfast_skip_matching_handles_window_eviction() {
1230 let mut matcher = DfastMatchGenerator::new(16);
1231
1232 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
1233 matcher.skip_matching();
1234 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
1235 matcher.skip_matching();
1236 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
1237
1238 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
1239 matcher.start_matching(|seq| match seq {
1240 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
1241 Sequence::Triple {
1242 literals,
1243 offset,
1244 match_len,
1245 } => {
1246 reconstructed.extend_from_slice(literals);
1247 let start = reconstructed.len() - offset;
1248 for i in 0..match_len {
1249 let byte = reconstructed[start + i];
1250 reconstructed.push(byte);
1251 }
1252 }
1253 });
1254
1255 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
1256}
1257
1258#[test]
1259fn dfast_inserts_tail_positions_for_next_block_matching() {
1260 let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
1261
1262 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
1263 let mut history = Vec::new();
1264 matcher.start_matching(|seq| match seq {
1265 Sequence::Literals { literals } => history.extend_from_slice(literals),
1266 Sequence::Triple { .. } => unreachable!("first block should not match history"),
1267 });
1268 assert_eq!(history, b"012345bcdea");
1269
1270 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
1271 let mut saw_first_sequence = false;
1272 matcher.start_matching(|seq| {
1273 assert!(!saw_first_sequence, "expected a single cross-block match");
1274 saw_first_sequence = true;
1275 match seq {
1276 Sequence::Literals { .. } => {
1277 panic!("expected tail-anchored cross-block match before any literals")
1278 }
1279 Sequence::Triple {
1280 literals,
1281 offset,
1282 match_len,
1283 } => {
1284 assert_eq!(literals, b"");
1285 assert_eq!(offset, 5);
1286 assert_eq!(match_len, 11);
1287 let start = history.len() - offset;
1288 for i in 0..match_len {
1289 let byte = history[start + i];
1290 history.push(byte);
1291 }
1292 }
1293 }
1294 });
1295
1296 assert!(
1297 saw_first_sequence,
1298 "expected tail-anchored cross-block match"
1299 );
1300 assert_eq!(history, b"012345bcdeabcdeabcdeab");
1301}