Skip to main content

structured_zstd/encoding/
match_generator.rs

1//! Matching algorithm used find repeated parts in the original data
2//!
3//! The Zstd format relies on finden repeated sequences of data and compressing these sequences as instructions to the decoder.
4//! A sequence basically tells the decoder "Go back X bytes and copy Y bytes to the end of your decode buffer".
5//!
6//! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data.
7
8use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10use core::convert::TryInto;
11use core::num::NonZeroUsize;
12
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17
18const MIN_MATCH_LEN: usize = 5;
19const FAST_HASH_FILL_STEP: usize = 3;
20const DFAST_MIN_MATCH_LEN: usize = 6;
21const DFAST_TARGET_LEN: usize = 48;
22// Keep these aligned with the issue's zstd level-3/dfast target unless ratio
23// measurements show we can shrink them without regressing acceptance tests.
24const DFAST_HASH_BITS: usize = 20;
25const DFAST_SEARCH_DEPTH: usize = 4;
26const DFAST_DEFAULT_WINDOW_SIZE: usize = 1 << 22;
27const DFAST_EMPTY_SLOT: usize = usize::MAX;
28
29#[derive(Copy, Clone, PartialEq, Eq)]
30enum MatcherBackend {
31    Simple,
32    Dfast,
33}
34
35/// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible.
36pub struct MatchGeneratorDriver {
37    vec_pool: Vec<Vec<u8>>,
38    suffix_pool: Vec<SuffixStore>,
39    match_generator: MatchGenerator,
40    dfast_match_generator: Option<DfastMatchGenerator>,
41    active_backend: MatcherBackend,
42    slice_size: usize,
43    base_slice_size: usize,
44    base_window_size: usize,
45    // Frame header window size must stay at the configured live-window budget.
46    // Dictionary retention expands internal matcher capacity only.
47    reported_window_size: usize,
48    // Tracks currently retained bytes that originated from primed dictionary
49    // history and have not been evicted yet.
50    dictionary_retained_budget: usize,
51}
52
53impl MatchGeneratorDriver {
54    /// slice_size says how big the slices should be that are allocated to work with
55    /// max_slices_in_window says how many slices should at most be used while looking for matches
56    pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
57        let max_window_size = max_slices_in_window * slice_size;
58        Self {
59            vec_pool: Vec::new(),
60            suffix_pool: Vec::new(),
61            match_generator: MatchGenerator::new(max_window_size),
62            dfast_match_generator: None,
63            active_backend: MatcherBackend::Simple,
64            slice_size,
65            base_slice_size: slice_size,
66            base_window_size: max_window_size,
67            reported_window_size: max_window_size,
68            dictionary_retained_budget: 0,
69        }
70    }
71
72    fn level_config(&self, level: CompressionLevel) -> (MatcherBackend, usize, usize, usize) {
73        match level {
74            CompressionLevel::Uncompressed => (
75                MatcherBackend::Simple,
76                self.base_slice_size,
77                self.base_window_size,
78                1,
79            ),
80            CompressionLevel::Fastest => (
81                MatcherBackend::Simple,
82                self.base_slice_size,
83                self.base_window_size,
84                FAST_HASH_FILL_STEP,
85            ),
86            CompressionLevel::Default => (
87                MatcherBackend::Dfast,
88                self.base_slice_size,
89                DFAST_DEFAULT_WINDOW_SIZE,
90                1,
91            ),
92            CompressionLevel::Better => (
93                MatcherBackend::Simple,
94                self.base_slice_size,
95                self.base_window_size,
96                1,
97            ),
98            CompressionLevel::Best => (
99                MatcherBackend::Simple,
100                self.base_slice_size,
101                self.base_window_size,
102                1,
103            ),
104        }
105    }
106
107    fn dfast_matcher(&self) -> &DfastMatchGenerator {
108        self.dfast_match_generator
109            .as_ref()
110            .expect("dfast backend must be initialized by reset() before use")
111    }
112
113    fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
114        self.dfast_match_generator
115            .as_mut()
116            .expect("dfast backend must be initialized by reset() before use")
117    }
118
119    fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
120        let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
121        if reclaimed == 0 {
122            return;
123        }
124        self.dictionary_retained_budget -= reclaimed;
125        match self.active_backend {
126            MatcherBackend::Simple => {
127                self.match_generator.max_window_size = self
128                    .match_generator
129                    .max_window_size
130                    .saturating_sub(reclaimed);
131            }
132            MatcherBackend::Dfast => {
133                let matcher = self.dfast_matcher_mut();
134                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
135            }
136        }
137    }
138
139    fn trim_after_budget_retire(&mut self) {
140        loop {
141            let mut evicted_bytes = 0usize;
142            match self.active_backend {
143                MatcherBackend::Simple => {
144                    let vec_pool = &mut self.vec_pool;
145                    let suffix_pool = &mut self.suffix_pool;
146                    self.match_generator.reserve(0, |mut data, mut suffixes| {
147                        evicted_bytes += data.len();
148                        data.resize(data.capacity(), 0);
149                        vec_pool.push(data);
150                        suffixes.slots.clear();
151                        suffixes.slots.resize(suffixes.slots.capacity(), None);
152                        suffix_pool.push(suffixes);
153                    });
154                }
155                MatcherBackend::Dfast => {
156                    let mut retired = Vec::new();
157                    self.dfast_matcher_mut().trim_to_window(|data| {
158                        evicted_bytes += data.len();
159                        retired.push(data);
160                    });
161                    for mut data in retired {
162                        data.resize(data.capacity(), 0);
163                        self.vec_pool.push(data);
164                    }
165                }
166            }
167            if evicted_bytes == 0 {
168                break;
169            }
170            self.retire_dictionary_budget(evicted_bytes);
171        }
172    }
173}
174
175impl Matcher for MatchGeneratorDriver {
176    fn supports_dictionary_priming(&self) -> bool {
177        true
178    }
179
180    fn reset(&mut self, level: CompressionLevel) {
181        let (backend, slice_size, max_window_size, hash_fill_step) = self.level_config(level);
182        self.dictionary_retained_budget = 0;
183        if self.active_backend != backend {
184            match self.active_backend {
185                MatcherBackend::Simple => {
186                    let vec_pool = &mut self.vec_pool;
187                    let suffix_pool = &mut self.suffix_pool;
188                    self.match_generator.reset(|mut data, mut suffixes| {
189                        data.resize(data.capacity(), 0);
190                        vec_pool.push(data);
191                        suffixes.slots.clear();
192                        suffixes.slots.resize(suffixes.slots.capacity(), None);
193                        suffix_pool.push(suffixes);
194                    });
195                }
196                MatcherBackend::Dfast => {
197                    if let Some(dfast) = self.dfast_match_generator.as_mut() {
198                        let vec_pool = &mut self.vec_pool;
199                        dfast.reset(|mut data| {
200                            data.resize(data.capacity(), 0);
201                            vec_pool.push(data);
202                        });
203                    }
204                }
205            }
206        }
207
208        self.active_backend = backend;
209        self.slice_size = slice_size;
210        self.reported_window_size = max_window_size;
211        match self.active_backend {
212            MatcherBackend::Simple => {
213                let vec_pool = &mut self.vec_pool;
214                let suffix_pool = &mut self.suffix_pool;
215                self.match_generator.max_window_size = max_window_size;
216                self.match_generator.hash_fill_step = hash_fill_step;
217                self.match_generator.reset(|mut data, mut suffixes| {
218                    data.resize(data.capacity(), 0);
219                    vec_pool.push(data);
220                    suffixes.slots.clear();
221                    suffixes.slots.resize(suffixes.slots.capacity(), None);
222                    suffix_pool.push(suffixes);
223                });
224            }
225            MatcherBackend::Dfast => {
226                let dfast = self
227                    .dfast_match_generator
228                    .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
229                dfast.max_window_size = max_window_size;
230                let vec_pool = &mut self.vec_pool;
231                dfast.reset(|mut data| {
232                    data.resize(data.capacity(), 0);
233                    vec_pool.push(data);
234                });
235            }
236        }
237    }
238
239    fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
240        match self.active_backend {
241            MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
242            MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
243        }
244
245        if dict_content.is_empty() {
246            return;
247        }
248
249        // Dictionary bytes should stay addressable until produced frame output
250        // itself exceeds the live window size.
251        let retained_dict_budget = dict_content.len();
252        match self.active_backend {
253            MatcherBackend::Simple => {
254                self.match_generator.max_window_size = self
255                    .match_generator
256                    .max_window_size
257                    .saturating_add(retained_dict_budget);
258            }
259            MatcherBackend::Dfast => {
260                let matcher = self.dfast_matcher_mut();
261                matcher.max_window_size =
262                    matcher.max_window_size.saturating_add(retained_dict_budget);
263            }
264        }
265
266        let mut start = 0usize;
267        let mut committed_dict_budget = 0usize;
268        let min_primed_tail = match self.active_backend {
269            MatcherBackend::Simple => MIN_MATCH_LEN,
270            MatcherBackend::Dfast => 4,
271        };
272        while start < dict_content.len() {
273            let end = (start + self.slice_size).min(dict_content.len());
274            if end - start < min_primed_tail {
275                break;
276            }
277            let mut space = self.get_next_space();
278            space.clear();
279            space.extend_from_slice(&dict_content[start..end]);
280            self.commit_space(space);
281            self.skip_matching();
282            committed_dict_budget += end - start;
283            start = end;
284        }
285
286        let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
287        if uncommitted_tail_budget > 0 {
288            match self.active_backend {
289                MatcherBackend::Simple => {
290                    self.match_generator.max_window_size = self
291                        .match_generator
292                        .max_window_size
293                        .saturating_sub(uncommitted_tail_budget);
294                }
295                MatcherBackend::Dfast => {
296                    let matcher = self.dfast_matcher_mut();
297                    matcher.max_window_size = matcher
298                        .max_window_size
299                        .saturating_sub(uncommitted_tail_budget);
300                }
301            }
302        }
303        if committed_dict_budget > 0 {
304            self.dictionary_retained_budget = self
305                .dictionary_retained_budget
306                .saturating_add(committed_dict_budget);
307        }
308    }
309
310    fn window_size(&self) -> u64 {
311        self.reported_window_size as u64
312    }
313
314    fn get_next_space(&mut self) -> Vec<u8> {
315        self.vec_pool.pop().unwrap_or_else(|| {
316            let mut space = alloc::vec![0; self.slice_size];
317            space.resize(space.capacity(), 0);
318            space
319        })
320    }
321
322    fn get_last_space(&mut self) -> &[u8] {
323        match self.active_backend {
324            MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
325            MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
326        }
327    }
328
329    fn commit_space(&mut self, space: Vec<u8>) {
330        match self.active_backend {
331            MatcherBackend::Simple => {
332                let vec_pool = &mut self.vec_pool;
333                let mut evicted_bytes = 0usize;
334                let suffixes = self
335                    .suffix_pool
336                    .pop()
337                    .unwrap_or_else(|| SuffixStore::with_capacity(space.len()));
338                let suffix_pool = &mut self.suffix_pool;
339                self.match_generator
340                    .add_data(space, suffixes, |mut data, mut suffixes| {
341                        evicted_bytes += data.len();
342                        data.resize(data.capacity(), 0);
343                        vec_pool.push(data);
344                        suffixes.slots.clear();
345                        suffixes.slots.resize(suffixes.slots.capacity(), None);
346                        suffix_pool.push(suffixes);
347                    });
348                self.retire_dictionary_budget(evicted_bytes);
349                self.trim_after_budget_retire();
350            }
351            MatcherBackend::Dfast => {
352                let vec_pool = &mut self.vec_pool;
353                let mut evicted_bytes = 0usize;
354                self.dfast_match_generator
355                    .as_mut()
356                    .expect("dfast backend must be initialized by reset() before use")
357                    .add_data(space, |mut data| {
358                        evicted_bytes += data.len();
359                        data.resize(data.capacity(), 0);
360                        vec_pool.push(data);
361                    });
362                self.retire_dictionary_budget(evicted_bytes);
363                self.trim_after_budget_retire();
364            }
365        }
366    }
367
368    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
369        match self.active_backend {
370            MatcherBackend::Simple => {
371                while self.match_generator.next_sequence(&mut handle_sequence) {}
372            }
373            MatcherBackend::Dfast => self
374                .dfast_matcher_mut()
375                .start_matching(&mut handle_sequence),
376        }
377    }
378    fn skip_matching(&mut self) {
379        match self.active_backend {
380            MatcherBackend::Simple => self.match_generator.skip_matching(),
381            MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
382        }
383    }
384}
385
386/// This stores the index of a suffix of a string by hashing the first few bytes of that suffix
387/// This means that collisions just overwrite and that you need to check validity after a get
388struct SuffixStore {
389    // We use NonZeroUsize to enable niche optimization here.
390    // On store we do +1 and on get -1
391    // This is ok since usize::MAX is never a valid offset
392    slots: Vec<Option<NonZeroUsize>>,
393    len_log: u32,
394}
395
396impl SuffixStore {
397    fn with_capacity(capacity: usize) -> Self {
398        Self {
399            slots: alloc::vec![None; capacity],
400            len_log: capacity.ilog2(),
401        }
402    }
403
404    #[inline(always)]
405    fn insert(&mut self, suffix: &[u8], idx: usize) {
406        let key = self.key(suffix);
407        self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
408    }
409
410    #[inline(always)]
411    fn contains_key(&self, suffix: &[u8]) -> bool {
412        let key = self.key(suffix);
413        self.slots[key].is_some()
414    }
415
416    #[inline(always)]
417    fn get(&self, suffix: &[u8]) -> Option<usize> {
418        let key = self.key(suffix);
419        self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
420    }
421
422    #[inline(always)]
423    fn key(&self, suffix: &[u8]) -> usize {
424        // Capacity=1 yields len_log=0; shifting by 64 would panic.
425        if self.len_log == 0 {
426            return 0;
427        }
428
429        let s0 = suffix[0] as u64;
430        let s1 = suffix[1] as u64;
431        let s2 = suffix[2] as u64;
432        let s3 = suffix[3] as u64;
433        let s4 = suffix[4] as u64;
434
435        const POLY: u64 = 0xCF3BCCDCABu64;
436
437        let s0 = (s0 << 24).wrapping_mul(POLY);
438        let s1 = (s1 << 32).wrapping_mul(POLY);
439        let s2 = (s2 << 40).wrapping_mul(POLY);
440        let s3 = (s3 << 48).wrapping_mul(POLY);
441        let s4 = (s4 << 56).wrapping_mul(POLY);
442
443        let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
444        let index = index >> (64 - self.len_log);
445        index as usize % self.slots.len()
446    }
447}
448
449/// We keep a window of a few of these entries
450/// All of these are valid targets for a match to be generated for
451struct WindowEntry {
452    data: Vec<u8>,
453    /// Stores indexes into data
454    suffixes: SuffixStore,
455    /// Makes offset calculations efficient
456    base_offset: usize,
457}
458
459pub(crate) struct MatchGenerator {
460    max_window_size: usize,
461    /// Data window we are operating on to find matches
462    /// The data we want to find matches for is in the last slice
463    window: Vec<WindowEntry>,
464    window_size: usize,
465    #[cfg(debug_assertions)]
466    concat_window: Vec<u8>,
467    /// Index in the last slice that we already processed
468    suffix_idx: usize,
469    /// Gets updated when a new sequence is returned to point right behind that sequence
470    last_idx_in_sequence: usize,
471    hash_fill_step: usize,
472    offset_hist: [u32; 3],
473}
474
475impl MatchGenerator {
476    #[inline(always)]
477    #[cfg(target_endian = "little")]
478    fn mismatch_byte_index(diff: usize) -> usize {
479        diff.trailing_zeros() as usize / 8
480    }
481
482    #[inline(always)]
483    #[cfg(target_endian = "big")]
484    fn mismatch_byte_index(diff: usize) -> usize {
485        diff.leading_zeros() as usize / 8
486    }
487
488    /// max_size defines how many bytes will be used at most in the window used for matching
489    fn new(max_size: usize) -> Self {
490        Self {
491            max_window_size: max_size,
492            window: Vec::new(),
493            window_size: 0,
494            #[cfg(debug_assertions)]
495            concat_window: Vec::new(),
496            suffix_idx: 0,
497            last_idx_in_sequence: 0,
498            hash_fill_step: 1,
499            offset_hist: [1, 4, 8],
500        }
501    }
502
503    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
504        self.window_size = 0;
505        #[cfg(debug_assertions)]
506        self.concat_window.clear();
507        self.suffix_idx = 0;
508        self.last_idx_in_sequence = 0;
509        self.offset_hist = [1, 4, 8];
510        self.window.drain(..).for_each(|entry| {
511            reuse_space(entry.data, entry.suffixes);
512        });
513    }
514
515    /// Processes bytes in the current window until either a match is found or no more matches can be found
516    /// * If a match is found handle_sequence is called with the Triple variant
517    /// * If no more matches can be found but there are bytes still left handle_sequence is called with the Literals variant
518    /// * If no more matches can be found and no more bytes are left this returns false
519    fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
520        loop {
521            let last_entry = self.window.last().unwrap();
522            let data_slice = &last_entry.data;
523
524            // We already reached the end of the window, check if we need to return a Literals{}
525            if self.suffix_idx >= data_slice.len() {
526                if self.last_idx_in_sequence != self.suffix_idx {
527                    let literals = &data_slice[self.last_idx_in_sequence..];
528                    self.last_idx_in_sequence = self.suffix_idx;
529                    handle_sequence(Sequence::Literals { literals });
530                    return true;
531                } else {
532                    return false;
533                }
534            }
535
536            // If the remaining data is smaller than the minimum match length we can stop and return a Literals{}
537            let data_slice = &data_slice[self.suffix_idx..];
538            if data_slice.len() < MIN_MATCH_LEN {
539                let last_idx_in_sequence = self.last_idx_in_sequence;
540                self.last_idx_in_sequence = last_entry.data.len();
541                self.suffix_idx = last_entry.data.len();
542                handle_sequence(Sequence::Literals {
543                    literals: &last_entry.data[last_idx_in_sequence..],
544                });
545                return true;
546            }
547
548            // This is the key we are looking to find a match for
549            let key = &data_slice[..MIN_MATCH_LEN];
550            let literals_len = self.suffix_idx - self.last_idx_in_sequence;
551
552            // Look in each window entry
553            let mut candidate = self.repcode_candidate(data_slice, literals_len);
554            for match_entry in self.window.iter() {
555                if let Some(match_index) = match_entry.suffixes.get(key) {
556                    let match_slice = &match_entry.data[match_index..];
557
558                    // Check how long the common prefix actually is
559                    let match_len = Self::common_prefix_len(match_slice, data_slice);
560
561                    // Collisions in the suffix store might make this check fail
562                    if match_len >= MIN_MATCH_LEN {
563                        let offset = match_entry.base_offset + self.suffix_idx - match_index;
564
565                        // If we are in debug/tests make sure the match we found is actually at the offset we calculated
566                        #[cfg(debug_assertions)]
567                        {
568                            let unprocessed = last_entry.data.len() - self.suffix_idx;
569                            let start = self.concat_window.len() - unprocessed - offset;
570                            let end = start + match_len;
571                            let check_slice = &self.concat_window[start..end];
572                            debug_assert_eq!(check_slice, &match_slice[..match_len]);
573                        }
574
575                        if let Some((old_offset, old_match_len)) = candidate {
576                            if match_len > old_match_len
577                                || (match_len == old_match_len && offset < old_offset)
578                            {
579                                candidate = Some((offset, match_len));
580                            }
581                        } else {
582                            candidate = Some((offset, match_len));
583                        }
584                    }
585                }
586            }
587
588            if let Some((offset, match_len)) = candidate {
589                // For each index in the match we found we do not need to look for another match
590                // But we still want them registered in the suffix store
591                self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
592
593                // All literals that were not included between this match and the last are now included here
594                let last_entry = self.window.last().unwrap();
595                let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
596
597                // Update the indexes, all indexes upto and including the current index have been included in a sequence now
598                self.suffix_idx += match_len;
599                self.last_idx_in_sequence = self.suffix_idx;
600                let _ = encode_offset_with_history(
601                    offset as u32,
602                    literals.len() as u32,
603                    &mut self.offset_hist,
604                );
605                handle_sequence(Sequence::Triple {
606                    literals,
607                    offset,
608                    match_len,
609                });
610
611                return true;
612            }
613
614            let last_entry = self.window.last_mut().unwrap();
615            let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
616            if !last_entry.suffixes.contains_key(key) {
617                last_entry.suffixes.insert(key, self.suffix_idx);
618            }
619            self.suffix_idx += 1;
620        }
621    }
622
623    /// Find the common prefix length between two byte slices
624    #[inline(always)]
625    fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
626        let max = a.len().min(b.len());
627        let chunk = core::mem::size_of::<usize>();
628        let mut off = 0usize;
629        let lhs = a.as_ptr();
630        let rhs = b.as_ptr();
631
632        while off + chunk <= max {
633            let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
634            let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
635            let diff = lhs_word ^ rhs_word;
636            if diff != 0 {
637                return off + Self::mismatch_byte_index(diff);
638            }
639            off += chunk;
640        }
641
642        off + core::iter::zip(&a[off..max], &b[off..max])
643            .take_while(|(x, y)| x == y)
644            .count()
645    }
646
647    /// Process bytes and add the suffixes to the suffix store up to a specific index
648    #[inline(always)]
649    fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
650        let start = self.suffix_idx;
651        let last_entry = self.window.last_mut().unwrap();
652        if last_entry.data.len() < MIN_MATCH_LEN {
653            return;
654        }
655        let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
656        if insert_limit > start {
657            let data = last_entry.data.as_slice();
658            let suffixes = &mut last_entry.suffixes;
659            if fill_step == FAST_HASH_FILL_STEP {
660                Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
661            } else {
662                let mut pos = start;
663                while pos < insert_limit {
664                    Self::insert_suffix_if_absent(data, suffixes, pos);
665                    pos += fill_step;
666                }
667            }
668        }
669
670        if idx >= start + MIN_MATCH_LEN {
671            let tail_start = idx - MIN_MATCH_LEN;
672            let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
673            if !last_entry.suffixes.contains_key(tail_key) {
674                last_entry.suffixes.insert(tail_key, tail_start);
675            }
676        }
677    }
678
679    #[inline(always)]
680    fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
681        debug_assert!(
682            pos + MIN_MATCH_LEN <= data.len(),
683            "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
684            pos,
685            MIN_MATCH_LEN,
686            data.len()
687        );
688        let key = &data[pos..pos + MIN_MATCH_LEN];
689        if !suffixes.contains_key(key) {
690            suffixes.insert(key, pos);
691        }
692    }
693
694    #[inline(always)]
695    fn add_suffixes_interleaved_fast(
696        data: &[u8],
697        suffixes: &mut SuffixStore,
698        start: usize,
699        insert_limit: usize,
700    ) {
701        let lane = FAST_HASH_FILL_STEP;
702        let mut pos = start;
703
704        // Pipeline-ish fill: compute and retire several hash positions per loop
705        // so the fastest path keeps multiple independent hash lookups in flight.
706        while pos + lane * 3 < insert_limit {
707            let p0 = pos;
708            let p1 = pos + lane;
709            let p2 = pos + lane * 2;
710            let p3 = pos + lane * 3;
711
712            Self::insert_suffix_if_absent(data, suffixes, p0);
713            Self::insert_suffix_if_absent(data, suffixes, p1);
714            Self::insert_suffix_if_absent(data, suffixes, p2);
715            Self::insert_suffix_if_absent(data, suffixes, p3);
716
717            pos += lane * 4;
718        }
719
720        while pos < insert_limit {
721            Self::insert_suffix_if_absent(data, suffixes, pos);
722            pos += lane;
723        }
724    }
725
726    fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
727        if literals_len != 0 {
728            return None;
729        }
730
731        let reps = [
732            Some(self.offset_hist[1] as usize),
733            Some(self.offset_hist[2] as usize),
734            (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
735        ];
736
737        let mut best: Option<(usize, usize)> = None;
738        let mut seen = [0usize; 3];
739        let mut seen_len = 0usize;
740        for offset in reps.into_iter().flatten() {
741            if offset == 0 {
742                continue;
743            }
744            if seen[..seen_len].contains(&offset) {
745                continue;
746            }
747            seen[seen_len] = offset;
748            seen_len += 1;
749
750            let Some(match_len) = self.offset_match_len(offset, data_slice) else {
751                continue;
752            };
753            if match_len < MIN_MATCH_LEN {
754                continue;
755            }
756
757            if best.is_none_or(|(old_offset, old_len)| {
758                match_len > old_len || (match_len == old_len && offset < old_offset)
759            }) {
760                best = Some((offset, match_len));
761            }
762        }
763        best
764    }
765
766    fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
767        if offset == 0 {
768            return None;
769        }
770
771        let last_idx = self.window.len().checked_sub(1)?;
772        let last_entry = &self.window[last_idx];
773        let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
774        if offset > searchable_prefix {
775            return None;
776        }
777
778        let mut remaining = offset;
779        let (entry_idx, match_index) = if remaining <= self.suffix_idx {
780            (last_idx, self.suffix_idx - remaining)
781        } else {
782            remaining -= self.suffix_idx;
783            let mut found = None;
784            for entry_idx in (0..last_idx).rev() {
785                let len = self.window[entry_idx].data.len();
786                if remaining <= len {
787                    found = Some((entry_idx, len - remaining));
788                    break;
789                }
790                remaining -= len;
791            }
792            found?
793        };
794
795        let match_entry = &self.window[entry_idx];
796        let match_slice = &match_entry.data[match_index..];
797
798        Some(Self::common_prefix_len(match_slice, data_slice))
799    }
800
801    /// Skip matching for the whole current window entry
802    fn skip_matching(&mut self) {
803        let len = self.window.last().unwrap().data.len();
804        self.add_suffixes_till(len, 1);
805        self.suffix_idx = len;
806        self.last_idx_in_sequence = len;
807    }
808
809    /// Add a new window entry. Will panic if the last window entry hasn't been processed properly.
810    /// If any resources are released by pushing the new entry they are returned via the callback
811    fn add_data(
812        &mut self,
813        data: Vec<u8>,
814        suffixes: SuffixStore,
815        reuse_space: impl FnMut(Vec<u8>, SuffixStore),
816    ) {
817        assert!(
818            self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
819        );
820        self.reserve(data.len(), reuse_space);
821        #[cfg(debug_assertions)]
822        self.concat_window.extend_from_slice(&data);
823
824        if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
825            for entry in self.window.iter_mut() {
826                entry.base_offset += last_len;
827            }
828        }
829
830        let len = data.len();
831        self.window.push(WindowEntry {
832            data,
833            suffixes,
834            base_offset: 0,
835        });
836        self.window_size += len;
837        self.suffix_idx = 0;
838        self.last_idx_in_sequence = 0;
839    }
840
841    /// Reserve space for a new window entry
842    /// If any resources are released by pushing the new entry they are returned via the callback
843    fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
844        assert!(self.max_window_size >= amount);
845        while self.window_size + amount > self.max_window_size {
846            let removed = self.window.remove(0);
847            self.window_size -= removed.data.len();
848            #[cfg(debug_assertions)]
849            self.concat_window.drain(0..removed.data.len());
850
851            let WindowEntry {
852                suffixes,
853                data: leaked_vec,
854                base_offset: _,
855            } = removed;
856            reuse_space(leaked_vec, suffixes);
857        }
858    }
859}
860
861struct DfastMatchGenerator {
862    max_window_size: usize,
863    window: VecDeque<Vec<u8>>,
864    window_size: usize,
865    // We keep a contiguous searchable history to avoid rebuilding and reseeding
866    // the matcher state from disjoint block buffers on every block.
867    history: Vec<u8>,
868    history_start: usize,
869    history_abs_start: usize,
870    offset_hist: [u32; 3],
871    short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
872    long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
873}
874
875#[derive(Copy, Clone)]
876struct MatchCandidate {
877    start: usize,
878    offset: usize,
879    match_len: usize,
880}
881
882impl DfastMatchGenerator {
883    fn new(max_window_size: usize) -> Self {
884        Self {
885            max_window_size,
886            window: VecDeque::new(),
887            window_size: 0,
888            history: Vec::new(),
889            history_start: 0,
890            history_abs_start: 0,
891            offset_hist: [1, 4, 8],
892            short_hash: Vec::new(),
893            long_hash: Vec::new(),
894        }
895    }
896
897    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
898        self.window_size = 0;
899        self.history.clear();
900        self.history_start = 0;
901        self.history_abs_start = 0;
902        self.offset_hist = [1, 4, 8];
903        if !self.short_hash.is_empty() {
904            self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
905            self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
906        }
907        for mut data in self.window.drain(..) {
908            data.resize(data.capacity(), 0);
909            reuse_space(data);
910        }
911    }
912
913    fn get_last_space(&self) -> &[u8] {
914        self.window.back().unwrap().as_slice()
915    }
916
917    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
918        assert!(data.len() <= self.max_window_size);
919        while self.window_size + data.len() > self.max_window_size {
920            let removed = self.window.pop_front().unwrap();
921            self.window_size -= removed.len();
922            self.history_start += removed.len();
923            self.history_abs_start += removed.len();
924            reuse_space(removed);
925        }
926        self.compact_history();
927        self.history.extend_from_slice(&data);
928        self.window_size += data.len();
929        self.window.push_back(data);
930    }
931
932    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
933        while self.window_size > self.max_window_size {
934            let removed = self.window.pop_front().unwrap();
935            self.window_size -= removed.len();
936            self.history_start += removed.len();
937            self.history_abs_start += removed.len();
938            reuse_space(removed);
939        }
940    }
941
942    fn skip_matching(&mut self) {
943        self.ensure_hash_tables();
944        let current_len = self.window.back().unwrap().len();
945        let current_abs_start = self.history_abs_start + self.window_size - current_len;
946        self.insert_positions(current_abs_start, current_abs_start + current_len);
947    }
948
949    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
950        self.ensure_hash_tables();
951
952        let current_len = self.window.back().unwrap().len();
953        if current_len == 0 {
954            return;
955        }
956
957        let current_abs_start = self.history_abs_start + self.window_size - current_len;
958
959        let mut pos = 0usize;
960        let mut literals_start = 0usize;
961        while pos + DFAST_MIN_MATCH_LEN <= current_len {
962            let abs_pos = current_abs_start + pos;
963            let lit_len = pos - literals_start;
964
965            let best = self.best_match(abs_pos, lit_len);
966            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
967                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
968                let current = self.window.back().unwrap().as_slice();
969                let start = candidate.start - current_abs_start;
970                let literals = &current[literals_start..start];
971                handle_sequence(Sequence::Triple {
972                    literals,
973                    offset: candidate.offset,
974                    match_len: candidate.match_len,
975                });
976                // The encoded offset value is irrelevant here; we only need the
977                // side effect on offset history for future rep-code matching.
978                let _ = encode_offset_with_history(
979                    candidate.offset as u32,
980                    literals.len() as u32,
981                    &mut self.offset_hist,
982                );
983                pos = start + candidate.match_len;
984                literals_start = pos;
985            } else {
986                self.insert_position(abs_pos);
987                pos += 1;
988            }
989        }
990
991        if literals_start < current_len {
992            // We stop inserting once fewer than DFAST_MIN_MATCH_LEN bytes remain.
993            // The last boundary-spanning start that can seed a dfast hash is
994            // still inserted by the loop above; `dfast_inserts_tail_positions_
995            // for_next_block_matching()` asserts that the next block can match
996            // immediately at the boundary without eagerly seeding the final
997            // DFAST_MIN_MATCH_LEN - 1 bytes here.
998            let current = self.window.back().unwrap().as_slice();
999            handle_sequence(Sequence::Literals {
1000                literals: &current[literals_start..],
1001            });
1002        }
1003    }
1004
1005    fn ensure_hash_tables(&mut self) {
1006        if self.short_hash.is_empty() {
1007            // This is intentionally lazy so Fastest/Uncompressed never pay the
1008            // ~dfast-level memory cost. The current size tracks the issue's
1009            // zstd level-3 style parameters rather than a generic low-memory preset.
1010            self.short_hash =
1011                alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
1012            self.long_hash =
1013                alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
1014        }
1015    }
1016
1017    fn compact_history(&mut self) {
1018        if self.history_start == 0 {
1019            return;
1020        }
1021        if self.history_start >= self.max_window_size
1022            || self.history_start * 2 >= self.history.len()
1023        {
1024            self.history.drain(..self.history_start);
1025            self.history_start = 0;
1026        }
1027    }
1028
1029    fn live_history(&self) -> &[u8] {
1030        &self.history[self.history_start..]
1031    }
1032
1033    fn history_abs_end(&self) -> usize {
1034        self.history_abs_start + self.live_history().len()
1035    }
1036
1037    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1038        let rep = self.repcode_candidate(abs_pos, lit_len);
1039        let hash = self.hash_candidate(abs_pos, lit_len);
1040        Self::better_candidate(rep, hash)
1041    }
1042
1043    fn pick_lazy_match(
1044        &self,
1045        abs_pos: usize,
1046        lit_len: usize,
1047        best: Option<MatchCandidate>,
1048    ) -> Option<MatchCandidate> {
1049        let best = best?;
1050        if best.match_len >= DFAST_TARGET_LEN
1051            || abs_pos + 1 + DFAST_MIN_MATCH_LEN > self.history_abs_end()
1052        {
1053            return Some(best);
1054        }
1055
1056        let next = self.best_match(abs_pos + 1, lit_len + 1);
1057        match next {
1058            Some(next)
1059                if next.match_len > best.match_len
1060                    || (next.match_len == best.match_len && next.offset < best.offset) =>
1061            {
1062                None
1063            }
1064            _ => Some(best),
1065        }
1066    }
1067
1068    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1069        let reps = if lit_len == 0 {
1070            [
1071                Some(self.offset_hist[1] as usize),
1072                Some(self.offset_hist[2] as usize),
1073                (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1074            ]
1075        } else {
1076            [
1077                Some(self.offset_hist[0] as usize),
1078                Some(self.offset_hist[1] as usize),
1079                Some(self.offset_hist[2] as usize),
1080            ]
1081        };
1082
1083        let mut best = None;
1084        for rep in reps.into_iter().flatten() {
1085            if rep == 0 || rep > abs_pos {
1086                continue;
1087            }
1088            let candidate_pos = abs_pos - rep;
1089            if candidate_pos < self.history_abs_start {
1090                continue;
1091            }
1092            let concat = self.live_history();
1093            let candidate_idx = candidate_pos - self.history_abs_start;
1094            let current_idx = abs_pos - self.history_abs_start;
1095            if current_idx + DFAST_MIN_MATCH_LEN > concat.len() {
1096                continue;
1097            }
1098            let match_len =
1099                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1100            if match_len >= DFAST_MIN_MATCH_LEN {
1101                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1102                best = Self::better_candidate(best, Some(candidate));
1103            }
1104        }
1105        best
1106    }
1107
1108    fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1109        let concat = self.live_history();
1110        let current_idx = abs_pos - self.history_abs_start;
1111        let mut best = None;
1112        for candidate_pos in self.long_candidates(abs_pos) {
1113            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1114                continue;
1115            }
1116            let candidate_idx = candidate_pos - self.history_abs_start;
1117            let match_len =
1118                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1119            if match_len >= DFAST_MIN_MATCH_LEN {
1120                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1121                best = Self::better_candidate(best, Some(candidate));
1122                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1123                    return best;
1124                }
1125            }
1126        }
1127
1128        for candidate_pos in self.short_candidates(abs_pos) {
1129            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1130                continue;
1131            }
1132            let candidate_idx = candidate_pos - self.history_abs_start;
1133            let match_len =
1134                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1135            if match_len >= DFAST_MIN_MATCH_LEN {
1136                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1137                best = Self::better_candidate(best, Some(candidate));
1138                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1139                    return best;
1140                }
1141            }
1142        }
1143        best
1144    }
1145
1146    fn extend_backwards(
1147        &self,
1148        mut candidate_pos: usize,
1149        mut abs_pos: usize,
1150        mut match_len: usize,
1151        lit_len: usize,
1152    ) -> MatchCandidate {
1153        let concat = self.live_history();
1154        let min_abs_pos = abs_pos - lit_len;
1155        while abs_pos > min_abs_pos
1156            && candidate_pos > self.history_abs_start
1157            && concat[candidate_pos - self.history_abs_start - 1]
1158                == concat[abs_pos - self.history_abs_start - 1]
1159        {
1160            candidate_pos -= 1;
1161            abs_pos -= 1;
1162            match_len += 1;
1163        }
1164        MatchCandidate {
1165            start: abs_pos,
1166            offset: abs_pos - candidate_pos,
1167            match_len,
1168        }
1169    }
1170
1171    fn better_candidate(
1172        lhs: Option<MatchCandidate>,
1173        rhs: Option<MatchCandidate>,
1174    ) -> Option<MatchCandidate> {
1175        match (lhs, rhs) {
1176            (None, other) | (other, None) => other,
1177            (Some(lhs), Some(rhs)) => {
1178                if rhs.match_len > lhs.match_len
1179                    || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1180                {
1181                    Some(rhs)
1182                } else {
1183                    Some(lhs)
1184                }
1185            }
1186        }
1187    }
1188
1189    fn insert_positions(&mut self, start: usize, end: usize) {
1190        for pos in start..end {
1191            self.insert_position(pos);
1192        }
1193    }
1194
1195    fn insert_position(&mut self, pos: usize) {
1196        let idx = pos - self.history_abs_start;
1197        let short = {
1198            let concat = self.live_history();
1199            (idx + 4 <= concat.len()).then(|| Self::hash4(&concat[idx..]))
1200        };
1201        if let Some(short) = short {
1202            let bucket = &mut self.short_hash[short];
1203            if bucket[0] != pos {
1204                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1205                bucket[0] = pos;
1206            }
1207        }
1208
1209        let long = {
1210            let concat = self.live_history();
1211            (idx + 8 <= concat.len()).then(|| Self::hash8(&concat[idx..]))
1212        };
1213        if let Some(long) = long {
1214            let bucket = &mut self.long_hash[long];
1215            if bucket[0] != pos {
1216                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1217                bucket[0] = pos;
1218            }
1219        }
1220    }
1221
1222    fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1223        let concat = self.live_history();
1224        let idx = pos - self.history_abs_start;
1225        (idx + 4 <= concat.len())
1226            .then(|| self.short_hash[Self::hash4(&concat[idx..])])
1227            .into_iter()
1228            .flatten()
1229            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1230    }
1231
1232    fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1233        let concat = self.live_history();
1234        let idx = pos - self.history_abs_start;
1235        (idx + 8 <= concat.len())
1236            .then(|| self.long_hash[Self::hash8(&concat[idx..])])
1237            .into_iter()
1238            .flatten()
1239            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1240    }
1241
1242    fn hash4(data: &[u8]) -> usize {
1243        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1244        Self::hash_bits(value)
1245    }
1246
1247    fn hash8(data: &[u8]) -> usize {
1248        let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1249        Self::hash_bits(value)
1250    }
1251
1252    fn hash_bits(value: u64) -> usize {
1253        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1254        ((value.wrapping_mul(PRIME)) >> (64 - DFAST_HASH_BITS)) as usize
1255    }
1256}
1257
1258#[test]
1259fn matches() {
1260    let mut matcher = MatchGenerator::new(1000);
1261    let mut original_data = Vec::new();
1262    let mut reconstructed = Vec::new();
1263
1264    let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
1265        Sequence::Literals { literals } => {
1266            assert!(!literals.is_empty());
1267            reconstructed.extend_from_slice(literals);
1268        }
1269        Sequence::Triple {
1270            literals,
1271            offset,
1272            match_len,
1273        } => {
1274            assert!(offset > 0);
1275            assert!(match_len >= MIN_MATCH_LEN);
1276            reconstructed.extend_from_slice(literals);
1277            assert!(offset <= reconstructed.len());
1278            let start = reconstructed.len() - offset;
1279            for i in 0..match_len {
1280                let byte = reconstructed[start + i];
1281                reconstructed.push(byte);
1282            }
1283        }
1284    };
1285
1286    matcher.add_data(
1287        alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
1288        SuffixStore::with_capacity(100),
1289        |_, _| {},
1290    );
1291    original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1292
1293    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1294
1295    assert!(!matcher.next_sequence(|_| {}));
1296
1297    matcher.add_data(
1298        alloc::vec![
1299            1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
1300        ],
1301        SuffixStore::with_capacity(100),
1302        |_, _| {},
1303    );
1304    original_data.extend_from_slice(&[
1305        1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
1306    ]);
1307
1308    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1309    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1310    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1311    assert!(!matcher.next_sequence(|_| {}));
1312
1313    matcher.add_data(
1314        alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
1315        SuffixStore::with_capacity(100),
1316        |_, _| {},
1317    );
1318    original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
1319
1320    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1321    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1322    assert!(!matcher.next_sequence(|_| {}));
1323
1324    matcher.add_data(
1325        alloc::vec![0, 0, 0, 0, 0],
1326        SuffixStore::with_capacity(100),
1327        |_, _| {},
1328    );
1329    original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
1330
1331    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1332    assert!(!matcher.next_sequence(|_| {}));
1333
1334    matcher.add_data(
1335        alloc::vec![7, 8, 9, 10, 11],
1336        SuffixStore::with_capacity(100),
1337        |_, _| {},
1338    );
1339    original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
1340
1341    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1342    assert!(!matcher.next_sequence(|_| {}));
1343
1344    matcher.add_data(
1345        alloc::vec![1, 3, 5, 7, 9],
1346        SuffixStore::with_capacity(100),
1347        |_, _| {},
1348    );
1349    matcher.skip_matching();
1350    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
1351    reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
1352    assert!(!matcher.next_sequence(|_| {}));
1353
1354    matcher.add_data(
1355        alloc::vec![1, 3, 5, 7, 9],
1356        SuffixStore::with_capacity(100),
1357        |_, _| {},
1358    );
1359    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
1360
1361    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1362    assert!(!matcher.next_sequence(|_| {}));
1363
1364    matcher.add_data(
1365        alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
1366        SuffixStore::with_capacity(100),
1367        |_, _| {},
1368    );
1369    original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
1370
1371    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1372    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1373    assert!(!matcher.next_sequence(|_| {}));
1374
1375    assert_eq!(reconstructed, original_data);
1376}
1377
1378#[test]
1379fn dfast_matches_roundtrip_multi_block_pattern() {
1380    let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
1381    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
1382    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
1383
1384    let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
1385    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
1386        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
1387        Sequence::Triple {
1388            literals,
1389            offset,
1390            match_len,
1391        } => {
1392            decoded.extend_from_slice(literals);
1393            let start = decoded.len() - offset;
1394            for i in 0..match_len {
1395                let byte = decoded[start + i];
1396                decoded.push(byte);
1397            }
1398        }
1399    };
1400
1401    matcher.add_data(first_block.clone(), |_| {});
1402    let mut history = Vec::new();
1403    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
1404    assert_eq!(history, first_block);
1405
1406    matcher.add_data(second_block.clone(), |_| {});
1407    let prefix_len = history.len();
1408    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
1409
1410    assert_eq!(&history[prefix_len..], second_block.as_slice());
1411}
1412
1413#[test]
1414fn driver_switches_backends_and_initializes_dfast_via_reset() {
1415    let mut driver = MatchGeneratorDriver::new(32, 2);
1416
1417    driver.reset(CompressionLevel::Default);
1418    assert_eq!(driver.window_size(), DFAST_DEFAULT_WINDOW_SIZE as u64);
1419
1420    let mut first = driver.get_next_space();
1421    first[..12].copy_from_slice(b"abcabcabcabc");
1422    first.truncate(12);
1423    driver.commit_space(first);
1424    assert_eq!(driver.get_last_space(), b"abcabcabcabc");
1425    driver.skip_matching();
1426
1427    let mut second = driver.get_next_space();
1428    second[..12].copy_from_slice(b"abcabcabcabc");
1429    second.truncate(12);
1430    driver.commit_space(second);
1431
1432    let mut reconstructed = b"abcabcabcabc".to_vec();
1433    driver.start_matching(|seq| match seq {
1434        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
1435        Sequence::Triple {
1436            literals,
1437            offset,
1438            match_len,
1439        } => {
1440            reconstructed.extend_from_slice(literals);
1441            let start = reconstructed.len() - offset;
1442            for i in 0..match_len {
1443                let byte = reconstructed[start + i];
1444                reconstructed.push(byte);
1445            }
1446        }
1447    });
1448    assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
1449
1450    driver.reset(CompressionLevel::Fastest);
1451    assert_eq!(driver.window_size(), 64);
1452}
1453
1454#[test]
1455fn prime_with_dictionary_preserves_history_for_first_full_block() {
1456    let mut driver = MatchGeneratorDriver::new(8, 1);
1457    driver.reset(CompressionLevel::Fastest);
1458
1459    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
1460
1461    let mut space = driver.get_next_space();
1462    space.clear();
1463    space.extend_from_slice(b"abcdefgh");
1464    driver.commit_space(space);
1465
1466    let mut saw_match = false;
1467    driver.start_matching(|seq| {
1468        if let Sequence::Triple {
1469            literals,
1470            offset,
1471            match_len,
1472        } = seq
1473            && literals.is_empty()
1474            && offset == 8
1475            && match_len >= MIN_MATCH_LEN
1476        {
1477            saw_match = true;
1478        }
1479    });
1480
1481    assert!(
1482        saw_match,
1483        "first full block should still match dictionary-primed history"
1484    );
1485}
1486
1487#[test]
1488fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
1489    let mut driver = MatchGeneratorDriver::new(8, 1);
1490    driver.reset(CompressionLevel::Fastest);
1491
1492    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
1493
1494    let mut space = driver.get_next_space();
1495    space.clear();
1496    space.extend_from_slice(b"abcdefgh");
1497    driver.commit_space(space);
1498
1499    let mut saw_match = false;
1500    driver.start_matching(|seq| {
1501        if let Sequence::Triple {
1502            literals,
1503            offset,
1504            match_len,
1505        } = seq
1506            && literals.is_empty()
1507            && offset == 24
1508            && match_len >= MIN_MATCH_LEN
1509        {
1510            saw_match = true;
1511        }
1512    });
1513
1514    assert!(
1515        saw_match,
1516        "dictionary bytes should remain addressable until frame output exceeds the live window"
1517    );
1518}
1519
1520#[test]
1521fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
1522    let mut driver = MatchGeneratorDriver::new(8, 1);
1523    driver.reset(CompressionLevel::Fastest);
1524
1525    driver.prime_with_dictionary(&[], [11, 7, 3]);
1526
1527    assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
1528}
1529
1530#[test]
1531fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
1532    let mut driver = MatchGeneratorDriver::new(8, 1);
1533    driver.reset(CompressionLevel::Default);
1534
1535    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
1536
1537    let mut space = driver.get_next_space();
1538    space.clear();
1539    space.extend_from_slice(b"abcdefgh");
1540    driver.commit_space(space);
1541
1542    let mut saw_match = false;
1543    driver.start_matching(|seq| {
1544        if let Sequence::Triple {
1545            literals,
1546            offset,
1547            match_len,
1548        } = seq
1549            && literals.is_empty()
1550            && offset == 8
1551            && match_len >= DFAST_MIN_MATCH_LEN
1552        {
1553            saw_match = true;
1554        }
1555    });
1556
1557    assert!(
1558        saw_match,
1559        "dfast backend should match dictionary-primed history in first full block"
1560    );
1561}
1562
1563#[test]
1564fn prime_with_dictionary_does_not_inflate_reported_window_size() {
1565    let mut driver = MatchGeneratorDriver::new(8, 1);
1566    driver.reset(CompressionLevel::Fastest);
1567
1568    let before = driver.window_size();
1569    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
1570    let after = driver.window_size();
1571
1572    assert_eq!(
1573        after, before,
1574        "dictionary retention budget must not change reported frame window size"
1575    );
1576}
1577
1578#[test]
1579fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
1580    let mut driver = MatchGeneratorDriver::new(8, 2);
1581    driver.reset(CompressionLevel::Fastest);
1582
1583    // This dictionary leaves a 1-byte tail chunk (capacity=1 suffix table),
1584    // which should never be committed to the matcher window.
1585    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
1586
1587    assert!(
1588        driver
1589            .match_generator
1590            .window
1591            .iter()
1592            .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
1593        "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
1594    );
1595}
1596
1597#[test]
1598fn prime_with_dictionary_counts_only_committed_tail_budget() {
1599    let mut driver = MatchGeneratorDriver::new(8, 1);
1600    driver.reset(CompressionLevel::Fastest);
1601
1602    let before = driver.match_generator.max_window_size;
1603    // One full slice plus a 1-byte tail that cannot be committed.
1604    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
1605
1606    assert_eq!(
1607        driver.match_generator.max_window_size,
1608        before + 8,
1609        "retention budget must account only for dictionary bytes actually committed to history"
1610    );
1611}
1612
1613#[test]
1614fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
1615    let mut driver = MatchGeneratorDriver::new(8, 1);
1616    driver.reset(CompressionLevel::Default);
1617
1618    let before = driver.dfast_matcher().max_window_size;
1619    // One full slice plus a 4-byte tail. Dfast can still use this tail through
1620    // short-hash overlap into the next block, so it should stay retained.
1621    driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
1622
1623    assert_eq!(
1624        driver.dfast_matcher().max_window_size,
1625        before + 12,
1626        "dfast retention budget should include 4-byte dictionary tails"
1627    );
1628}
1629
1630#[test]
1631fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
1632    let mut driver = MatchGeneratorDriver::new(8, 1);
1633    driver.reset(CompressionLevel::Fastest);
1634
1635    let base_window = driver.match_generator.max_window_size;
1636    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
1637    assert_eq!(driver.match_generator.max_window_size, base_window + 24);
1638
1639    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
1640        let mut space = driver.get_next_space();
1641        space.clear();
1642        space.extend_from_slice(block);
1643        driver.commit_space(space);
1644        driver.skip_matching();
1645    }
1646
1647    assert_eq!(
1648        driver.dictionary_retained_budget, 0,
1649        "dictionary budget should be fully retired once primed dict slices are evicted"
1650    );
1651    assert_eq!(
1652        driver.match_generator.max_window_size, base_window,
1653        "retired dictionary budget must not remain reusable for live history"
1654    );
1655}
1656
1657#[test]
1658fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
1659    let mut driver = MatchGeneratorDriver::new(8, 1);
1660    driver.reset(CompressionLevel::Default);
1661    // Use a small live window in this regression so dictionary-primed slices are
1662    // evicted quickly and budget retirement can be asserted deterministically.
1663    driver.dfast_matcher_mut().max_window_size = 8;
1664    driver.reported_window_size = 8;
1665
1666    let base_window = driver.dfast_matcher().max_window_size;
1667    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
1668    assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
1669
1670    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
1671        let mut space = driver.get_next_space();
1672        space.clear();
1673        space.extend_from_slice(block);
1674        driver.commit_space(space);
1675        driver.skip_matching();
1676    }
1677
1678    assert_eq!(
1679        driver.dictionary_retained_budget, 0,
1680        "dictionary budget should be fully retired once primed dict slices are evicted"
1681    );
1682    assert_eq!(
1683        driver.dfast_matcher().max_window_size,
1684        base_window,
1685        "retired dictionary budget must not remain reusable for live history"
1686    );
1687}
1688
1689#[test]
1690fn suffix_store_with_single_slot_does_not_panic_on_keying() {
1691    let mut suffixes = SuffixStore::with_capacity(1);
1692    suffixes.insert(b"abcde", 0);
1693    assert!(suffixes.contains_key(b"abcde"));
1694    assert_eq!(suffixes.get(b"abcde"), Some(0));
1695}
1696
1697#[test]
1698fn fastest_reset_uses_interleaved_hash_fill_step() {
1699    let mut driver = MatchGeneratorDriver::new(32, 2);
1700
1701    driver.reset(CompressionLevel::Uncompressed);
1702    assert_eq!(driver.match_generator.hash_fill_step, 1);
1703
1704    driver.reset(CompressionLevel::Fastest);
1705    assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
1706
1707    driver.reset(CompressionLevel::Better);
1708    assert_eq!(driver.match_generator.hash_fill_step, 1);
1709}
1710
1711#[test]
1712fn simple_matcher_updates_offset_history_after_emitting_match() {
1713    let mut matcher = MatchGenerator::new(64);
1714    matcher.add_data(
1715        b"abcdeabcdeabcde".to_vec(),
1716        SuffixStore::with_capacity(64),
1717        |_, _| {},
1718    );
1719
1720    assert!(matcher.next_sequence(|seq| {
1721        assert_eq!(
1722            seq,
1723            Sequence::Triple {
1724                literals: b"abcde",
1725                offset: 5,
1726                match_len: 10,
1727            }
1728        );
1729    }));
1730    assert_eq!(matcher.offset_hist, [5, 1, 4]);
1731}
1732
1733#[test]
1734fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
1735    let mut matcher = MatchGenerator::new(64);
1736    matcher.add_data(
1737        b"abcdefghijabcdefghij".to_vec(),
1738        SuffixStore::with_capacity(64),
1739        |_, _| {},
1740    );
1741
1742    matcher.suffix_idx = 10;
1743    matcher.last_idx_in_sequence = 10;
1744    matcher.offset_hist = [99, 10, 4];
1745
1746    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
1747    assert_eq!(candidate, Some((10, 10)));
1748}
1749
1750#[test]
1751fn simple_matcher_repcode_can_target_previous_window_entry() {
1752    let mut matcher = MatchGenerator::new(64);
1753    matcher.add_data(
1754        b"abcdefghij".to_vec(),
1755        SuffixStore::with_capacity(64),
1756        |_, _| {},
1757    );
1758    matcher.skip_matching();
1759    matcher.add_data(
1760        b"abcdefghij".to_vec(),
1761        SuffixStore::with_capacity(64),
1762        |_, _| {},
1763    );
1764
1765    matcher.offset_hist = [99, 10, 4];
1766
1767    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
1768    assert_eq!(candidate, Some((10, 10)));
1769}
1770
1771#[test]
1772fn simple_matcher_zero_literal_repcode_checks_rep2() {
1773    let mut matcher = MatchGenerator::new(64);
1774    matcher.add_data(
1775        b"abcdefghijabcdefghij".to_vec(),
1776        SuffixStore::with_capacity(64),
1777        |_, _| {},
1778    );
1779    matcher.suffix_idx = 10;
1780    matcher.last_idx_in_sequence = 10;
1781    // rep1=4 does not match at idx 10, rep2=10 does.
1782    matcher.offset_hist = [99, 4, 10];
1783
1784    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
1785    assert_eq!(candidate, Some((10, 10)));
1786}
1787
1788#[test]
1789fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
1790    let mut matcher = MatchGenerator::new(64);
1791    matcher.add_data(
1792        b"abcdefghijabcdefghij".to_vec(),
1793        SuffixStore::with_capacity(64),
1794        |_, _| {},
1795    );
1796    matcher.suffix_idx = 10;
1797    matcher.last_idx_in_sequence = 10;
1798    // rep1=4 and rep2=99 do not match; rep0-1 == 10 does.
1799    matcher.offset_hist = [11, 4, 99];
1800
1801    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
1802    assert_eq!(candidate, Some((10, 10)));
1803}
1804
1805#[test]
1806fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
1807    let mut matcher = MatchGenerator::new(64);
1808    matcher.add_data(
1809        b"abcdefghij".to_vec(),
1810        SuffixStore::with_capacity(64),
1811        |_, _| {},
1812    );
1813    matcher.skip_matching();
1814    matcher.add_data(
1815        b"klmnopqrst".to_vec(),
1816        SuffixStore::with_capacity(64),
1817        |_, _| {},
1818    );
1819    matcher.suffix_idx = 3;
1820
1821    let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
1822    assert_eq!(candidate, None);
1823}
1824
1825#[test]
1826fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
1827    let mut matcher = MatchGenerator::new(64);
1828    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
1829    matcher.add_data(
1830        b"abcdefghijklmnop".to_vec(),
1831        SuffixStore::with_capacity(64),
1832        |_, _| {},
1833    );
1834    matcher.skip_matching();
1835    matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
1836
1837    assert!(matcher.next_sequence(|seq| {
1838        assert_eq!(
1839            seq,
1840            Sequence::Triple {
1841                literals: b"",
1842                offset: 15,
1843                match_len: 5,
1844            }
1845        );
1846    }));
1847    assert!(!matcher.next_sequence(|_| {}));
1848}
1849
1850#[test]
1851fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
1852    let mut matcher = MatchGenerator::new(64);
1853    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
1854    matcher.add_data(
1855        b"01234abcde".to_vec(),
1856        SuffixStore::with_capacity(64),
1857        |_, _| {},
1858    );
1859    matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
1860
1861    let last = matcher.window.last().unwrap();
1862    let tail = &last.data[5..10];
1863    assert_eq!(last.suffixes.get(tail), Some(5));
1864}
1865
1866#[test]
1867fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
1868    let mut matcher = MatchGenerator::new(128);
1869    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
1870    matcher.add_data(
1871        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
1872        SuffixStore::with_capacity(1 << 16),
1873        |_, _| {},
1874    );
1875
1876    matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
1877
1878    let last = matcher.window.last().unwrap();
1879    let first_key = &last.data[..MIN_MATCH_LEN];
1880    assert_eq!(last.suffixes.get(first_key), None);
1881}
1882
1883#[test]
1884fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
1885    let mut matcher = MatchGenerator::new(128);
1886    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
1887    matcher.add_data(
1888        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
1889        SuffixStore::with_capacity(1 << 16),
1890        |_, _| {},
1891    );
1892
1893    matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
1894
1895    let last = matcher.window.last().unwrap();
1896    for pos in [0usize, 3, 6, 9, 12] {
1897        let key = &last.data[pos..pos + MIN_MATCH_LEN];
1898        assert_eq!(
1899            last.suffixes.get(key),
1900            Some(pos),
1901            "expected interleaved suffix registration at pos {pos}"
1902        );
1903    }
1904}
1905
1906#[test]
1907fn dfast_skip_matching_handles_window_eviction() {
1908    let mut matcher = DfastMatchGenerator::new(16);
1909
1910    matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
1911    matcher.skip_matching();
1912    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
1913    matcher.skip_matching();
1914    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
1915
1916    let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
1917    matcher.start_matching(|seq| match seq {
1918        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
1919        Sequence::Triple {
1920            literals,
1921            offset,
1922            match_len,
1923        } => {
1924            reconstructed.extend_from_slice(literals);
1925            let start = reconstructed.len() - offset;
1926            for i in 0..match_len {
1927                let byte = reconstructed[start + i];
1928                reconstructed.push(byte);
1929            }
1930        }
1931    });
1932
1933    assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
1934}
1935
1936#[test]
1937fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
1938    let mut matcher = DfastMatchGenerator::new(8);
1939
1940    let mut first = Vec::with_capacity(64);
1941    first.extend_from_slice(b"abcdefgh");
1942    matcher.add_data(first, |_| {});
1943
1944    let mut second = Vec::with_capacity(64);
1945    second.extend_from_slice(b"ijklmnop");
1946
1947    let mut observed_evicted_len = None;
1948    matcher.add_data(second, |data| {
1949        observed_evicted_len = Some(data.len());
1950    });
1951
1952    assert_eq!(
1953        observed_evicted_len,
1954        Some(8),
1955        "eviction callback must report evicted byte length, not backing capacity"
1956    );
1957}
1958
1959#[test]
1960fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
1961    let mut matcher = DfastMatchGenerator::new(16);
1962
1963    let mut first = Vec::with_capacity(64);
1964    first.extend_from_slice(b"abcdefgh");
1965    matcher.add_data(first, |_| {});
1966
1967    let mut second = Vec::with_capacity(64);
1968    second.extend_from_slice(b"ijklmnop");
1969    matcher.add_data(second, |_| {});
1970
1971    matcher.max_window_size = 8;
1972
1973    let mut observed_evicted_len = None;
1974    matcher.trim_to_window(|data| {
1975        observed_evicted_len = Some(data.len());
1976    });
1977
1978    assert_eq!(
1979        observed_evicted_len,
1980        Some(8),
1981        "trim callback must report evicted byte length, not backing capacity"
1982    );
1983}
1984
1985#[test]
1986fn dfast_inserts_tail_positions_for_next_block_matching() {
1987    let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
1988
1989    matcher.add_data(b"012345bcdea".to_vec(), |_| {});
1990    let mut history = Vec::new();
1991    matcher.start_matching(|seq| match seq {
1992        Sequence::Literals { literals } => history.extend_from_slice(literals),
1993        Sequence::Triple { .. } => unreachable!("first block should not match history"),
1994    });
1995    assert_eq!(history, b"012345bcdea");
1996
1997    matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
1998    let mut saw_first_sequence = false;
1999    matcher.start_matching(|seq| {
2000        assert!(!saw_first_sequence, "expected a single cross-block match");
2001        saw_first_sequence = true;
2002        match seq {
2003            Sequence::Literals { .. } => {
2004                panic!("expected tail-anchored cross-block match before any literals")
2005            }
2006            Sequence::Triple {
2007                literals,
2008                offset,
2009                match_len,
2010            } => {
2011                assert_eq!(literals, b"");
2012                assert_eq!(offset, 5);
2013                assert_eq!(match_len, 11);
2014                let start = history.len() - offset;
2015                for i in 0..match_len {
2016                    let byte = history[start + i];
2017                    history.push(byte);
2018                }
2019            }
2020        }
2021    });
2022
2023    assert!(
2024        saw_first_sequence,
2025        "expected tail-anchored cross-block match"
2026    );
2027    assert_eq!(history, b"012345bcdeabcdeabcdeab");
2028}