Skip to main content

structured_zstd/encoding/
match_generator.rs

1//! Matching algorithm used find repeated parts in the original data
2//!
3//! The Zstd format relies on finden repeated sequences of data and compressing these sequences as instructions to the decoder.
4//! A sequence basically tells the decoder "Go back X bytes and copy Y bytes to the end of your decode buffer".
5//!
6//! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data.
7
8use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10use core::convert::TryInto;
11use core::num::NonZeroUsize;
12
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17
18const MIN_MATCH_LEN: usize = 5;
19const FAST_HASH_FILL_STEP: usize = 3;
20const DFAST_MIN_MATCH_LEN: usize = 6;
21const DFAST_TARGET_LEN: usize = 48;
22// Keep these aligned with the issue's zstd level-3/dfast target unless ratio
23// measurements show we can shrink them without regressing acceptance tests.
24const DFAST_HASH_BITS: usize = 20;
25const DFAST_SEARCH_DEPTH: usize = 4;
26const DFAST_DEFAULT_WINDOW_SIZE: usize = 1 << 22;
27const BETTER_DEFAULT_WINDOW_SIZE: usize = 1 << 23;
28const DFAST_EMPTY_SLOT: usize = usize::MAX;
29
30const HC_HASH_LOG: usize = 20;
31const HC_CHAIN_LOG: usize = 19;
32const HC_SEARCH_DEPTH: usize = 16;
33const HC_MIN_MATCH_LEN: usize = 5;
34const HC_TARGET_LEN: usize = 48;
35// Positions are stored as (abs_pos + 1) so that 0 is a safe empty sentinel
36// that can never collide with any valid position, even at the 4 GiB boundary.
37const HC_EMPTY: u32 = 0;
38
39const BEST_DEFAULT_WINDOW_SIZE: usize = 1 << 24;
40// Maximum search depth across all HC-based levels. Used to size the
41// fixed-length candidate array returned by chain_candidates().
42const MAX_HC_SEARCH_DEPTH: usize = 32;
43
44/// Bundled tuning knobs for the hash-chain matcher. Using a typed config
45/// instead of positional `usize` args eliminates parameter-order hazards.
46#[derive(Copy, Clone)]
47struct HcConfig {
48    hash_log: usize,
49    chain_log: usize,
50    search_depth: usize,
51    target_len: usize,
52}
53
54const HC_CONFIG: HcConfig = HcConfig {
55    hash_log: HC_HASH_LOG,
56    chain_log: HC_CHAIN_LOG,
57    search_depth: HC_SEARCH_DEPTH,
58    target_len: HC_TARGET_LEN,
59};
60
61/// Best-level: deeper search, larger tables, higher target length.
62const BEST_HC_CONFIG: HcConfig = HcConfig {
63    hash_log: 21,
64    chain_log: 20,
65    search_depth: 32,
66    target_len: 128,
67};
68
69#[derive(Copy, Clone, Debug, PartialEq, Eq)]
70enum MatcherBackend {
71    Simple,
72    Dfast,
73    HashChain,
74}
75
76/// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible.
77pub struct MatchGeneratorDriver {
78    vec_pool: Vec<Vec<u8>>,
79    suffix_pool: Vec<SuffixStore>,
80    match_generator: MatchGenerator,
81    dfast_match_generator: Option<DfastMatchGenerator>,
82    hc_match_generator: Option<HcMatchGenerator>,
83    active_backend: MatcherBackend,
84    slice_size: usize,
85    base_slice_size: usize,
86    base_window_size: usize,
87    // Frame header window size must stay at the configured live-window budget.
88    // Dictionary retention expands internal matcher capacity only.
89    reported_window_size: usize,
90    // Tracks currently retained bytes that originated from primed dictionary
91    // history and have not been evicted yet.
92    dictionary_retained_budget: usize,
93}
94
95impl MatchGeneratorDriver {
96    /// slice_size says how big the slices should be that are allocated to work with
97    /// max_slices_in_window says how many slices should at most be used while looking for matches
98    pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
99        let max_window_size = max_slices_in_window * slice_size;
100        Self {
101            vec_pool: Vec::new(),
102            suffix_pool: Vec::new(),
103            match_generator: MatchGenerator::new(max_window_size),
104            dfast_match_generator: None,
105            hc_match_generator: None,
106            active_backend: MatcherBackend::Simple,
107            slice_size,
108            base_slice_size: slice_size,
109            base_window_size: max_window_size,
110            reported_window_size: max_window_size,
111            dictionary_retained_budget: 0,
112        }
113    }
114
115    fn level_config(&self, level: CompressionLevel) -> (MatcherBackend, usize, usize, usize) {
116        match level {
117            CompressionLevel::Uncompressed => (
118                MatcherBackend::Simple,
119                self.base_slice_size,
120                self.base_window_size,
121                1,
122            ),
123            CompressionLevel::Fastest => (
124                MatcherBackend::Simple,
125                self.base_slice_size,
126                self.base_window_size,
127                FAST_HASH_FILL_STEP,
128            ),
129            CompressionLevel::Default => (
130                MatcherBackend::Dfast,
131                self.base_slice_size,
132                DFAST_DEFAULT_WINDOW_SIZE,
133                1,
134            ),
135            CompressionLevel::Better => (
136                MatcherBackend::HashChain,
137                self.base_slice_size,
138                BETTER_DEFAULT_WINDOW_SIZE,
139                1,
140            ),
141            CompressionLevel::Best => (
142                MatcherBackend::HashChain,
143                self.base_slice_size,
144                BEST_DEFAULT_WINDOW_SIZE,
145                1,
146            ),
147        }
148    }
149
150    fn dfast_matcher(&self) -> &DfastMatchGenerator {
151        self.dfast_match_generator
152            .as_ref()
153            .expect("dfast backend must be initialized by reset() before use")
154    }
155
156    fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
157        self.dfast_match_generator
158            .as_mut()
159            .expect("dfast backend must be initialized by reset() before use")
160    }
161
162    fn hc_matcher(&self) -> &HcMatchGenerator {
163        self.hc_match_generator
164            .as_ref()
165            .expect("hash chain backend must be initialized by reset() before use")
166    }
167
168    fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
169        self.hc_match_generator
170            .as_mut()
171            .expect("hash chain backend must be initialized by reset() before use")
172    }
173
174    fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
175        let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
176        if reclaimed == 0 {
177            return;
178        }
179        self.dictionary_retained_budget -= reclaimed;
180        match self.active_backend {
181            MatcherBackend::Simple => {
182                self.match_generator.max_window_size = self
183                    .match_generator
184                    .max_window_size
185                    .saturating_sub(reclaimed);
186            }
187            MatcherBackend::Dfast => {
188                let matcher = self.dfast_matcher_mut();
189                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
190            }
191            MatcherBackend::HashChain => {
192                let matcher = self.hc_matcher_mut();
193                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
194            }
195        }
196    }
197
198    fn trim_after_budget_retire(&mut self) {
199        loop {
200            let mut evicted_bytes = 0usize;
201            match self.active_backend {
202                MatcherBackend::Simple => {
203                    let vec_pool = &mut self.vec_pool;
204                    let suffix_pool = &mut self.suffix_pool;
205                    self.match_generator.reserve(0, |mut data, mut suffixes| {
206                        evicted_bytes += data.len();
207                        data.resize(data.capacity(), 0);
208                        vec_pool.push(data);
209                        suffixes.slots.clear();
210                        suffixes.slots.resize(suffixes.slots.capacity(), None);
211                        suffix_pool.push(suffixes);
212                    });
213                }
214                MatcherBackend::Dfast => {
215                    let mut retired = Vec::new();
216                    self.dfast_matcher_mut().trim_to_window(|data| {
217                        evicted_bytes += data.len();
218                        retired.push(data);
219                    });
220                    for mut data in retired {
221                        data.resize(data.capacity(), 0);
222                        self.vec_pool.push(data);
223                    }
224                }
225                MatcherBackend::HashChain => {
226                    let mut retired = Vec::new();
227                    self.hc_matcher_mut().trim_to_window(|data| {
228                        evicted_bytes += data.len();
229                        retired.push(data);
230                    });
231                    for mut data in retired {
232                        data.resize(data.capacity(), 0);
233                        self.vec_pool.push(data);
234                    }
235                }
236            }
237            if evicted_bytes == 0 {
238                break;
239            }
240            self.retire_dictionary_budget(evicted_bytes);
241        }
242    }
243}
244
245impl Matcher for MatchGeneratorDriver {
246    fn supports_dictionary_priming(&self) -> bool {
247        true
248    }
249
250    fn reset(&mut self, level: CompressionLevel) {
251        let (backend, slice_size, max_window_size, hash_fill_step) = self.level_config(level);
252        self.dictionary_retained_budget = 0;
253        if self.active_backend != backend {
254            match self.active_backend {
255                MatcherBackend::Simple => {
256                    let vec_pool = &mut self.vec_pool;
257                    let suffix_pool = &mut self.suffix_pool;
258                    self.match_generator.reset(|mut data, mut suffixes| {
259                        data.resize(data.capacity(), 0);
260                        vec_pool.push(data);
261                        suffixes.slots.clear();
262                        suffixes.slots.resize(suffixes.slots.capacity(), None);
263                        suffix_pool.push(suffixes);
264                    });
265                }
266                MatcherBackend::Dfast => {
267                    if let Some(dfast) = self.dfast_match_generator.as_mut() {
268                        let vec_pool = &mut self.vec_pool;
269                        dfast.reset(|mut data| {
270                            data.resize(data.capacity(), 0);
271                            vec_pool.push(data);
272                        });
273                    }
274                }
275                MatcherBackend::HashChain => {
276                    if let Some(hc) = self.hc_match_generator.as_mut() {
277                        // Release oversized tables when switching away from
278                        // HashChain so Best's larger allocations don't persist.
279                        hc.hash_table = Vec::new();
280                        hc.chain_table = Vec::new();
281                        let vec_pool = &mut self.vec_pool;
282                        hc.reset(|mut data| {
283                            data.resize(data.capacity(), 0);
284                            vec_pool.push(data);
285                        });
286                    }
287                }
288            }
289        }
290
291        self.active_backend = backend;
292        self.slice_size = slice_size;
293        self.reported_window_size = max_window_size;
294        match self.active_backend {
295            MatcherBackend::Simple => {
296                let vec_pool = &mut self.vec_pool;
297                let suffix_pool = &mut self.suffix_pool;
298                self.match_generator.max_window_size = max_window_size;
299                self.match_generator.hash_fill_step = hash_fill_step;
300                self.match_generator.reset(|mut data, mut suffixes| {
301                    data.resize(data.capacity(), 0);
302                    vec_pool.push(data);
303                    suffixes.slots.clear();
304                    suffixes.slots.resize(suffixes.slots.capacity(), None);
305                    suffix_pool.push(suffixes);
306                });
307            }
308            MatcherBackend::Dfast => {
309                let dfast = self
310                    .dfast_match_generator
311                    .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
312                dfast.max_window_size = max_window_size;
313                dfast.lazy_depth = 1;
314                let vec_pool = &mut self.vec_pool;
315                dfast.reset(|mut data| {
316                    data.resize(data.capacity(), 0);
317                    vec_pool.push(data);
318                });
319            }
320            MatcherBackend::HashChain => {
321                let hc = self
322                    .hc_match_generator
323                    .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
324                hc.max_window_size = max_window_size;
325                hc.lazy_depth = 2;
326                match level {
327                    CompressionLevel::Best => hc.configure(BEST_HC_CONFIG),
328                    _ => hc.configure(HC_CONFIG),
329                }
330                let vec_pool = &mut self.vec_pool;
331                hc.reset(|mut data| {
332                    data.resize(data.capacity(), 0);
333                    vec_pool.push(data);
334                });
335            }
336        }
337    }
338
339    fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
340        match self.active_backend {
341            MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
342            MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
343            MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
344        }
345
346        if dict_content.is_empty() {
347            return;
348        }
349
350        // Dictionary bytes should stay addressable until produced frame output
351        // itself exceeds the live window size.
352        let retained_dict_budget = dict_content.len();
353        match self.active_backend {
354            MatcherBackend::Simple => {
355                self.match_generator.max_window_size = self
356                    .match_generator
357                    .max_window_size
358                    .saturating_add(retained_dict_budget);
359            }
360            MatcherBackend::Dfast => {
361                let matcher = self.dfast_matcher_mut();
362                matcher.max_window_size =
363                    matcher.max_window_size.saturating_add(retained_dict_budget);
364            }
365            MatcherBackend::HashChain => {
366                let matcher = self.hc_matcher_mut();
367                matcher.max_window_size =
368                    matcher.max_window_size.saturating_add(retained_dict_budget);
369            }
370        }
371
372        let mut start = 0usize;
373        let mut committed_dict_budget = 0usize;
374        // insert_position needs 4 bytes of lookahead for hashing;
375        // backfill_boundary_positions re-visits tail positions once the
376        // next slice extends history, but cannot hash <4 byte fragments.
377        let min_primed_tail = match self.active_backend {
378            MatcherBackend::Simple => MIN_MATCH_LEN,
379            MatcherBackend::Dfast | MatcherBackend::HashChain => 4,
380        };
381        while start < dict_content.len() {
382            let end = (start + self.slice_size).min(dict_content.len());
383            if end - start < min_primed_tail {
384                break;
385            }
386            let mut space = self.get_next_space();
387            space.clear();
388            space.extend_from_slice(&dict_content[start..end]);
389            self.commit_space(space);
390            self.skip_matching();
391            committed_dict_budget += end - start;
392            start = end;
393        }
394
395        let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
396        if uncommitted_tail_budget > 0 {
397            match self.active_backend {
398                MatcherBackend::Simple => {
399                    self.match_generator.max_window_size = self
400                        .match_generator
401                        .max_window_size
402                        .saturating_sub(uncommitted_tail_budget);
403                }
404                MatcherBackend::Dfast => {
405                    let matcher = self.dfast_matcher_mut();
406                    matcher.max_window_size = matcher
407                        .max_window_size
408                        .saturating_sub(uncommitted_tail_budget);
409                }
410                MatcherBackend::HashChain => {
411                    let matcher = self.hc_matcher_mut();
412                    matcher.max_window_size = matcher
413                        .max_window_size
414                        .saturating_sub(uncommitted_tail_budget);
415                }
416            }
417        }
418        if committed_dict_budget > 0 {
419            self.dictionary_retained_budget = self
420                .dictionary_retained_budget
421                .saturating_add(committed_dict_budget);
422        }
423    }
424
425    fn window_size(&self) -> u64 {
426        self.reported_window_size as u64
427    }
428
429    fn get_next_space(&mut self) -> Vec<u8> {
430        self.vec_pool.pop().unwrap_or_else(|| {
431            let mut space = alloc::vec![0; self.slice_size];
432            space.resize(space.capacity(), 0);
433            space
434        })
435    }
436
437    fn get_last_space(&mut self) -> &[u8] {
438        match self.active_backend {
439            MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
440            MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
441            MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
442        }
443    }
444
445    fn commit_space(&mut self, space: Vec<u8>) {
446        match self.active_backend {
447            MatcherBackend::Simple => {
448                let vec_pool = &mut self.vec_pool;
449                let mut evicted_bytes = 0usize;
450                let suffixes = self
451                    .suffix_pool
452                    .pop()
453                    .unwrap_or_else(|| SuffixStore::with_capacity(space.len()));
454                let suffix_pool = &mut self.suffix_pool;
455                self.match_generator
456                    .add_data(space, suffixes, |mut data, mut suffixes| {
457                        evicted_bytes += data.len();
458                        data.resize(data.capacity(), 0);
459                        vec_pool.push(data);
460                        suffixes.slots.clear();
461                        suffixes.slots.resize(suffixes.slots.capacity(), None);
462                        suffix_pool.push(suffixes);
463                    });
464                self.retire_dictionary_budget(evicted_bytes);
465                self.trim_after_budget_retire();
466            }
467            MatcherBackend::Dfast => {
468                let vec_pool = &mut self.vec_pool;
469                let mut evicted_bytes = 0usize;
470                self.dfast_match_generator
471                    .as_mut()
472                    .expect("dfast backend must be initialized by reset() before use")
473                    .add_data(space, |mut data| {
474                        evicted_bytes += data.len();
475                        data.resize(data.capacity(), 0);
476                        vec_pool.push(data);
477                    });
478                self.retire_dictionary_budget(evicted_bytes);
479                self.trim_after_budget_retire();
480            }
481            MatcherBackend::HashChain => {
482                let vec_pool = &mut self.vec_pool;
483                let mut evicted_bytes = 0usize;
484                self.hc_match_generator
485                    .as_mut()
486                    .expect("hash chain backend must be initialized by reset() before use")
487                    .add_data(space, |mut data| {
488                        evicted_bytes += data.len();
489                        data.resize(data.capacity(), 0);
490                        vec_pool.push(data);
491                    });
492                self.retire_dictionary_budget(evicted_bytes);
493                self.trim_after_budget_retire();
494            }
495        }
496    }
497
498    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
499        match self.active_backend {
500            MatcherBackend::Simple => {
501                while self.match_generator.next_sequence(&mut handle_sequence) {}
502            }
503            MatcherBackend::Dfast => self
504                .dfast_matcher_mut()
505                .start_matching(&mut handle_sequence),
506            MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
507        }
508    }
509    fn skip_matching(&mut self) {
510        match self.active_backend {
511            MatcherBackend::Simple => self.match_generator.skip_matching(),
512            MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
513            MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(),
514        }
515    }
516}
517
518/// This stores the index of a suffix of a string by hashing the first few bytes of that suffix
519/// This means that collisions just overwrite and that you need to check validity after a get
520struct SuffixStore {
521    // We use NonZeroUsize to enable niche optimization here.
522    // On store we do +1 and on get -1
523    // This is ok since usize::MAX is never a valid offset
524    slots: Vec<Option<NonZeroUsize>>,
525    len_log: u32,
526}
527
528impl SuffixStore {
529    fn with_capacity(capacity: usize) -> Self {
530        Self {
531            slots: alloc::vec![None; capacity],
532            len_log: capacity.ilog2(),
533        }
534    }
535
536    #[inline(always)]
537    fn insert(&mut self, suffix: &[u8], idx: usize) {
538        let key = self.key(suffix);
539        self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
540    }
541
542    #[inline(always)]
543    fn contains_key(&self, suffix: &[u8]) -> bool {
544        let key = self.key(suffix);
545        self.slots[key].is_some()
546    }
547
548    #[inline(always)]
549    fn get(&self, suffix: &[u8]) -> Option<usize> {
550        let key = self.key(suffix);
551        self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
552    }
553
554    #[inline(always)]
555    fn key(&self, suffix: &[u8]) -> usize {
556        // Capacity=1 yields len_log=0; shifting by 64 would panic.
557        if self.len_log == 0 {
558            return 0;
559        }
560
561        let s0 = suffix[0] as u64;
562        let s1 = suffix[1] as u64;
563        let s2 = suffix[2] as u64;
564        let s3 = suffix[3] as u64;
565        let s4 = suffix[4] as u64;
566
567        const POLY: u64 = 0xCF3BCCDCABu64;
568
569        let s0 = (s0 << 24).wrapping_mul(POLY);
570        let s1 = (s1 << 32).wrapping_mul(POLY);
571        let s2 = (s2 << 40).wrapping_mul(POLY);
572        let s3 = (s3 << 48).wrapping_mul(POLY);
573        let s4 = (s4 << 56).wrapping_mul(POLY);
574
575        let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
576        let index = index >> (64 - self.len_log);
577        index as usize % self.slots.len()
578    }
579}
580
581/// We keep a window of a few of these entries
582/// All of these are valid targets for a match to be generated for
583struct WindowEntry {
584    data: Vec<u8>,
585    /// Stores indexes into data
586    suffixes: SuffixStore,
587    /// Makes offset calculations efficient
588    base_offset: usize,
589}
590
591pub(crate) struct MatchGenerator {
592    max_window_size: usize,
593    /// Data window we are operating on to find matches
594    /// The data we want to find matches for is in the last slice
595    window: Vec<WindowEntry>,
596    window_size: usize,
597    #[cfg(debug_assertions)]
598    concat_window: Vec<u8>,
599    /// Index in the last slice that we already processed
600    suffix_idx: usize,
601    /// Gets updated when a new sequence is returned to point right behind that sequence
602    last_idx_in_sequence: usize,
603    hash_fill_step: usize,
604    offset_hist: [u32; 3],
605}
606
607impl MatchGenerator {
608    #[inline(always)]
609    #[cfg(target_endian = "little")]
610    fn mismatch_byte_index(diff: usize) -> usize {
611        diff.trailing_zeros() as usize / 8
612    }
613
614    #[inline(always)]
615    #[cfg(target_endian = "big")]
616    fn mismatch_byte_index(diff: usize) -> usize {
617        diff.leading_zeros() as usize / 8
618    }
619
620    /// max_size defines how many bytes will be used at most in the window used for matching
621    fn new(max_size: usize) -> Self {
622        Self {
623            max_window_size: max_size,
624            window: Vec::new(),
625            window_size: 0,
626            #[cfg(debug_assertions)]
627            concat_window: Vec::new(),
628            suffix_idx: 0,
629            last_idx_in_sequence: 0,
630            hash_fill_step: 1,
631            offset_hist: [1, 4, 8],
632        }
633    }
634
635    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
636        self.window_size = 0;
637        #[cfg(debug_assertions)]
638        self.concat_window.clear();
639        self.suffix_idx = 0;
640        self.last_idx_in_sequence = 0;
641        self.offset_hist = [1, 4, 8];
642        self.window.drain(..).for_each(|entry| {
643            reuse_space(entry.data, entry.suffixes);
644        });
645    }
646
647    /// Processes bytes in the current window until either a match is found or no more matches can be found
648    /// * If a match is found handle_sequence is called with the Triple variant
649    /// * If no more matches can be found but there are bytes still left handle_sequence is called with the Literals variant
650    /// * If no more matches can be found and no more bytes are left this returns false
651    fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
652        loop {
653            let last_entry = self.window.last().unwrap();
654            let data_slice = &last_entry.data;
655
656            // We already reached the end of the window, check if we need to return a Literals{}
657            if self.suffix_idx >= data_slice.len() {
658                if self.last_idx_in_sequence != self.suffix_idx {
659                    let literals = &data_slice[self.last_idx_in_sequence..];
660                    self.last_idx_in_sequence = self.suffix_idx;
661                    handle_sequence(Sequence::Literals { literals });
662                    return true;
663                } else {
664                    return false;
665                }
666            }
667
668            // If the remaining data is smaller than the minimum match length we can stop and return a Literals{}
669            let data_slice = &data_slice[self.suffix_idx..];
670            if data_slice.len() < MIN_MATCH_LEN {
671                let last_idx_in_sequence = self.last_idx_in_sequence;
672                self.last_idx_in_sequence = last_entry.data.len();
673                self.suffix_idx = last_entry.data.len();
674                handle_sequence(Sequence::Literals {
675                    literals: &last_entry.data[last_idx_in_sequence..],
676                });
677                return true;
678            }
679
680            // This is the key we are looking to find a match for
681            let key = &data_slice[..MIN_MATCH_LEN];
682            let literals_len = self.suffix_idx - self.last_idx_in_sequence;
683
684            // Look in each window entry
685            let mut candidate = self.repcode_candidate(data_slice, literals_len);
686            for match_entry in self.window.iter() {
687                if let Some(match_index) = match_entry.suffixes.get(key) {
688                    let match_slice = &match_entry.data[match_index..];
689
690                    // Check how long the common prefix actually is
691                    let match_len = Self::common_prefix_len(match_slice, data_slice);
692
693                    // Collisions in the suffix store might make this check fail
694                    if match_len >= MIN_MATCH_LEN {
695                        let offset = match_entry.base_offset + self.suffix_idx - match_index;
696
697                        // If we are in debug/tests make sure the match we found is actually at the offset we calculated
698                        #[cfg(debug_assertions)]
699                        {
700                            let unprocessed = last_entry.data.len() - self.suffix_idx;
701                            let start = self.concat_window.len() - unprocessed - offset;
702                            let end = start + match_len;
703                            let check_slice = &self.concat_window[start..end];
704                            debug_assert_eq!(check_slice, &match_slice[..match_len]);
705                        }
706
707                        if let Some((old_offset, old_match_len)) = candidate {
708                            if match_len > old_match_len
709                                || (match_len == old_match_len && offset < old_offset)
710                            {
711                                candidate = Some((offset, match_len));
712                            }
713                        } else {
714                            candidate = Some((offset, match_len));
715                        }
716                    }
717                }
718            }
719
720            if let Some((offset, match_len)) = candidate {
721                // For each index in the match we found we do not need to look for another match
722                // But we still want them registered in the suffix store
723                self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
724
725                // All literals that were not included between this match and the last are now included here
726                let last_entry = self.window.last().unwrap();
727                let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
728
729                // Update the indexes, all indexes upto and including the current index have been included in a sequence now
730                self.suffix_idx += match_len;
731                self.last_idx_in_sequence = self.suffix_idx;
732                let _ = encode_offset_with_history(
733                    offset as u32,
734                    literals.len() as u32,
735                    &mut self.offset_hist,
736                );
737                handle_sequence(Sequence::Triple {
738                    literals,
739                    offset,
740                    match_len,
741                });
742
743                return true;
744            }
745
746            let last_entry = self.window.last_mut().unwrap();
747            let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
748            if !last_entry.suffixes.contains_key(key) {
749                last_entry.suffixes.insert(key, self.suffix_idx);
750            }
751            self.suffix_idx += 1;
752        }
753    }
754
755    /// Find the common prefix length between two byte slices
756    #[inline(always)]
757    fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
758        let max = a.len().min(b.len());
759        let chunk = core::mem::size_of::<usize>();
760        let mut off = 0usize;
761        let lhs = a.as_ptr();
762        let rhs = b.as_ptr();
763
764        while off + chunk <= max {
765            let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
766            let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
767            let diff = lhs_word ^ rhs_word;
768            if diff != 0 {
769                return off + Self::mismatch_byte_index(diff);
770            }
771            off += chunk;
772        }
773
774        off + core::iter::zip(&a[off..max], &b[off..max])
775            .take_while(|(x, y)| x == y)
776            .count()
777    }
778
779    /// Process bytes and add the suffixes to the suffix store up to a specific index
780    #[inline(always)]
781    fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
782        let start = self.suffix_idx;
783        let last_entry = self.window.last_mut().unwrap();
784        if last_entry.data.len() < MIN_MATCH_LEN {
785            return;
786        }
787        let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
788        if insert_limit > start {
789            let data = last_entry.data.as_slice();
790            let suffixes = &mut last_entry.suffixes;
791            if fill_step == FAST_HASH_FILL_STEP {
792                Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
793            } else {
794                let mut pos = start;
795                while pos < insert_limit {
796                    Self::insert_suffix_if_absent(data, suffixes, pos);
797                    pos += fill_step;
798                }
799            }
800        }
801
802        if idx >= start + MIN_MATCH_LEN {
803            let tail_start = idx - MIN_MATCH_LEN;
804            let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
805            if !last_entry.suffixes.contains_key(tail_key) {
806                last_entry.suffixes.insert(tail_key, tail_start);
807            }
808        }
809    }
810
811    #[inline(always)]
812    fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
813        debug_assert!(
814            pos + MIN_MATCH_LEN <= data.len(),
815            "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
816            pos,
817            MIN_MATCH_LEN,
818            data.len()
819        );
820        let key = &data[pos..pos + MIN_MATCH_LEN];
821        if !suffixes.contains_key(key) {
822            suffixes.insert(key, pos);
823        }
824    }
825
826    #[inline(always)]
827    fn add_suffixes_interleaved_fast(
828        data: &[u8],
829        suffixes: &mut SuffixStore,
830        start: usize,
831        insert_limit: usize,
832    ) {
833        let lane = FAST_HASH_FILL_STEP;
834        let mut pos = start;
835
836        // Pipeline-ish fill: compute and retire several hash positions per loop
837        // so the fastest path keeps multiple independent hash lookups in flight.
838        while pos + lane * 3 < insert_limit {
839            let p0 = pos;
840            let p1 = pos + lane;
841            let p2 = pos + lane * 2;
842            let p3 = pos + lane * 3;
843
844            Self::insert_suffix_if_absent(data, suffixes, p0);
845            Self::insert_suffix_if_absent(data, suffixes, p1);
846            Self::insert_suffix_if_absent(data, suffixes, p2);
847            Self::insert_suffix_if_absent(data, suffixes, p3);
848
849            pos += lane * 4;
850        }
851
852        while pos < insert_limit {
853            Self::insert_suffix_if_absent(data, suffixes, pos);
854            pos += lane;
855        }
856    }
857
858    fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
859        if literals_len != 0 {
860            return None;
861        }
862
863        let reps = [
864            Some(self.offset_hist[1] as usize),
865            Some(self.offset_hist[2] as usize),
866            (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
867        ];
868
869        let mut best: Option<(usize, usize)> = None;
870        let mut seen = [0usize; 3];
871        let mut seen_len = 0usize;
872        for offset in reps.into_iter().flatten() {
873            if offset == 0 {
874                continue;
875            }
876            if seen[..seen_len].contains(&offset) {
877                continue;
878            }
879            seen[seen_len] = offset;
880            seen_len += 1;
881
882            let Some(match_len) = self.offset_match_len(offset, data_slice) else {
883                continue;
884            };
885            if match_len < MIN_MATCH_LEN {
886                continue;
887            }
888
889            if best.is_none_or(|(old_offset, old_len)| {
890                match_len > old_len || (match_len == old_len && offset < old_offset)
891            }) {
892                best = Some((offset, match_len));
893            }
894        }
895        best
896    }
897
898    fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
899        if offset == 0 {
900            return None;
901        }
902
903        let last_idx = self.window.len().checked_sub(1)?;
904        let last_entry = &self.window[last_idx];
905        let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
906        if offset > searchable_prefix {
907            return None;
908        }
909
910        let mut remaining = offset;
911        let (entry_idx, match_index) = if remaining <= self.suffix_idx {
912            (last_idx, self.suffix_idx - remaining)
913        } else {
914            remaining -= self.suffix_idx;
915            let mut found = None;
916            for entry_idx in (0..last_idx).rev() {
917                let len = self.window[entry_idx].data.len();
918                if remaining <= len {
919                    found = Some((entry_idx, len - remaining));
920                    break;
921                }
922                remaining -= len;
923            }
924            found?
925        };
926
927        let match_entry = &self.window[entry_idx];
928        let match_slice = &match_entry.data[match_index..];
929
930        Some(Self::common_prefix_len(match_slice, data_slice))
931    }
932
933    /// Skip matching for the whole current window entry
934    fn skip_matching(&mut self) {
935        let len = self.window.last().unwrap().data.len();
936        self.add_suffixes_till(len, 1);
937        self.suffix_idx = len;
938        self.last_idx_in_sequence = len;
939    }
940
941    /// Add a new window entry. Will panic if the last window entry hasn't been processed properly.
942    /// If any resources are released by pushing the new entry they are returned via the callback
943    fn add_data(
944        &mut self,
945        data: Vec<u8>,
946        suffixes: SuffixStore,
947        reuse_space: impl FnMut(Vec<u8>, SuffixStore),
948    ) {
949        assert!(
950            self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
951        );
952        self.reserve(data.len(), reuse_space);
953        #[cfg(debug_assertions)]
954        self.concat_window.extend_from_slice(&data);
955
956        if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
957            for entry in self.window.iter_mut() {
958                entry.base_offset += last_len;
959            }
960        }
961
962        let len = data.len();
963        self.window.push(WindowEntry {
964            data,
965            suffixes,
966            base_offset: 0,
967        });
968        self.window_size += len;
969        self.suffix_idx = 0;
970        self.last_idx_in_sequence = 0;
971    }
972
973    /// Reserve space for a new window entry
974    /// If any resources are released by pushing the new entry they are returned via the callback
975    fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
976        assert!(self.max_window_size >= amount);
977        while self.window_size + amount > self.max_window_size {
978            let removed = self.window.remove(0);
979            self.window_size -= removed.data.len();
980            #[cfg(debug_assertions)]
981            self.concat_window.drain(0..removed.data.len());
982
983            let WindowEntry {
984                suffixes,
985                data: leaked_vec,
986                base_offset: _,
987            } = removed;
988            reuse_space(leaked_vec, suffixes);
989        }
990    }
991}
992
993struct DfastMatchGenerator {
994    max_window_size: usize,
995    window: VecDeque<Vec<u8>>,
996    window_size: usize,
997    // We keep a contiguous searchable history to avoid rebuilding and reseeding
998    // the matcher state from disjoint block buffers on every block.
999    history: Vec<u8>,
1000    history_start: usize,
1001    history_abs_start: usize,
1002    offset_hist: [u32; 3],
1003    short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1004    long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1005    // Lazy match lookahead depth (internal tuning parameter).
1006    lazy_depth: u8,
1007}
1008
1009#[derive(Copy, Clone)]
1010struct MatchCandidate {
1011    start: usize,
1012    offset: usize,
1013    match_len: usize,
1014}
1015
1016impl DfastMatchGenerator {
1017    fn new(max_window_size: usize) -> Self {
1018        Self {
1019            max_window_size,
1020            window: VecDeque::new(),
1021            window_size: 0,
1022            history: Vec::new(),
1023            history_start: 0,
1024            history_abs_start: 0,
1025            offset_hist: [1, 4, 8],
1026            short_hash: Vec::new(),
1027            long_hash: Vec::new(),
1028            lazy_depth: 1,
1029        }
1030    }
1031
1032    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1033        self.window_size = 0;
1034        self.history.clear();
1035        self.history_start = 0;
1036        self.history_abs_start = 0;
1037        self.offset_hist = [1, 4, 8];
1038        if !self.short_hash.is_empty() {
1039            self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1040            self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1041        }
1042        for mut data in self.window.drain(..) {
1043            data.resize(data.capacity(), 0);
1044            reuse_space(data);
1045        }
1046    }
1047
1048    fn get_last_space(&self) -> &[u8] {
1049        self.window.back().unwrap().as_slice()
1050    }
1051
1052    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1053        assert!(data.len() <= self.max_window_size);
1054        while self.window_size + data.len() > self.max_window_size {
1055            let removed = self.window.pop_front().unwrap();
1056            self.window_size -= removed.len();
1057            self.history_start += removed.len();
1058            self.history_abs_start += removed.len();
1059            reuse_space(removed);
1060        }
1061        self.compact_history();
1062        self.history.extend_from_slice(&data);
1063        self.window_size += data.len();
1064        self.window.push_back(data);
1065    }
1066
1067    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1068        while self.window_size > self.max_window_size {
1069            let removed = self.window.pop_front().unwrap();
1070            self.window_size -= removed.len();
1071            self.history_start += removed.len();
1072            self.history_abs_start += removed.len();
1073            reuse_space(removed);
1074        }
1075    }
1076
1077    fn skip_matching(&mut self) {
1078        self.ensure_hash_tables();
1079        let current_len = self.window.back().unwrap().len();
1080        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1081        self.insert_positions(current_abs_start, current_abs_start + current_len);
1082    }
1083
1084    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1085        self.ensure_hash_tables();
1086
1087        let current_len = self.window.back().unwrap().len();
1088        if current_len == 0 {
1089            return;
1090        }
1091
1092        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1093
1094        let mut pos = 0usize;
1095        let mut literals_start = 0usize;
1096        while pos + DFAST_MIN_MATCH_LEN <= current_len {
1097            let abs_pos = current_abs_start + pos;
1098            let lit_len = pos - literals_start;
1099
1100            let best = self.best_match(abs_pos, lit_len);
1101            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1102                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1103                let current = self.window.back().unwrap().as_slice();
1104                let start = candidate.start - current_abs_start;
1105                let literals = &current[literals_start..start];
1106                handle_sequence(Sequence::Triple {
1107                    literals,
1108                    offset: candidate.offset,
1109                    match_len: candidate.match_len,
1110                });
1111                // The encoded offset value is irrelevant here; we only need the
1112                // side effect on offset history for future rep-code matching.
1113                let _ = encode_offset_with_history(
1114                    candidate.offset as u32,
1115                    literals.len() as u32,
1116                    &mut self.offset_hist,
1117                );
1118                pos = start + candidate.match_len;
1119                literals_start = pos;
1120            } else {
1121                self.insert_position(abs_pos);
1122                pos += 1;
1123            }
1124        }
1125
1126        if literals_start < current_len {
1127            // We stop inserting once fewer than DFAST_MIN_MATCH_LEN bytes remain.
1128            // The last boundary-spanning start that can seed a dfast hash is
1129            // still inserted by the loop above; `dfast_inserts_tail_positions_
1130            // for_next_block_matching()` asserts that the next block can match
1131            // immediately at the boundary without eagerly seeding the final
1132            // DFAST_MIN_MATCH_LEN - 1 bytes here.
1133            let current = self.window.back().unwrap().as_slice();
1134            handle_sequence(Sequence::Literals {
1135                literals: &current[literals_start..],
1136            });
1137        }
1138    }
1139
1140    fn ensure_hash_tables(&mut self) {
1141        if self.short_hash.is_empty() {
1142            // This is intentionally lazy so Fastest/Uncompressed never pay the
1143            // ~dfast-level memory cost. The current size tracks the issue's
1144            // zstd level-3 style parameters rather than a generic low-memory preset.
1145            self.short_hash =
1146                alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
1147            self.long_hash =
1148                alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
1149        }
1150    }
1151
1152    fn compact_history(&mut self) {
1153        if self.history_start == 0 {
1154            return;
1155        }
1156        if self.history_start >= self.max_window_size
1157            || self.history_start * 2 >= self.history.len()
1158        {
1159            self.history.drain(..self.history_start);
1160            self.history_start = 0;
1161        }
1162    }
1163
1164    fn live_history(&self) -> &[u8] {
1165        &self.history[self.history_start..]
1166    }
1167
1168    fn history_abs_end(&self) -> usize {
1169        self.history_abs_start + self.live_history().len()
1170    }
1171
1172    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1173        let rep = self.repcode_candidate(abs_pos, lit_len);
1174        let hash = self.hash_candidate(abs_pos, lit_len);
1175        Self::better_candidate(rep, hash)
1176    }
1177
1178    fn pick_lazy_match(
1179        &self,
1180        abs_pos: usize,
1181        lit_len: usize,
1182        best: Option<MatchCandidate>,
1183    ) -> Option<MatchCandidate> {
1184        let best = best?;
1185        if best.match_len >= DFAST_TARGET_LEN
1186            || abs_pos + 1 + DFAST_MIN_MATCH_LEN > self.history_abs_end()
1187        {
1188            return Some(best);
1189        }
1190
1191        // Lazy check: evaluate pos+1
1192        let next = self.best_match(abs_pos + 1, lit_len + 1);
1193        if let Some(next) = next
1194            && (next.match_len > best.match_len
1195                || (next.match_len == best.match_len && next.offset < best.offset))
1196        {
1197            return None;
1198        }
1199
1200        // Lazy2 check: also evaluate pos+2
1201        if self.lazy_depth >= 2 && abs_pos + 2 + DFAST_MIN_MATCH_LEN <= self.history_abs_end() {
1202            let next2 = self.best_match(abs_pos + 2, lit_len + 2);
1203            if let Some(next2) = next2
1204                && next2.match_len > best.match_len + 1
1205            {
1206                return None;
1207            }
1208        }
1209
1210        Some(best)
1211    }
1212
1213    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1214        let reps = if lit_len == 0 {
1215            [
1216                Some(self.offset_hist[1] as usize),
1217                Some(self.offset_hist[2] as usize),
1218                (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1219            ]
1220        } else {
1221            [
1222                Some(self.offset_hist[0] as usize),
1223                Some(self.offset_hist[1] as usize),
1224                Some(self.offset_hist[2] as usize),
1225            ]
1226        };
1227
1228        let mut best = None;
1229        for rep in reps.into_iter().flatten() {
1230            if rep == 0 || rep > abs_pos {
1231                continue;
1232            }
1233            let candidate_pos = abs_pos - rep;
1234            if candidate_pos < self.history_abs_start {
1235                continue;
1236            }
1237            let concat = self.live_history();
1238            let candidate_idx = candidate_pos - self.history_abs_start;
1239            let current_idx = abs_pos - self.history_abs_start;
1240            if current_idx + DFAST_MIN_MATCH_LEN > concat.len() {
1241                continue;
1242            }
1243            let match_len =
1244                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1245            if match_len >= DFAST_MIN_MATCH_LEN {
1246                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1247                best = Self::better_candidate(best, Some(candidate));
1248            }
1249        }
1250        best
1251    }
1252
1253    fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1254        let concat = self.live_history();
1255        let current_idx = abs_pos - self.history_abs_start;
1256        let mut best = None;
1257        for candidate_pos in self.long_candidates(abs_pos) {
1258            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1259                continue;
1260            }
1261            let candidate_idx = candidate_pos - self.history_abs_start;
1262            let match_len =
1263                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1264            if match_len >= DFAST_MIN_MATCH_LEN {
1265                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1266                best = Self::better_candidate(best, Some(candidate));
1267                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1268                    return best;
1269                }
1270            }
1271        }
1272
1273        for candidate_pos in self.short_candidates(abs_pos) {
1274            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1275                continue;
1276            }
1277            let candidate_idx = candidate_pos - self.history_abs_start;
1278            let match_len =
1279                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1280            if match_len >= DFAST_MIN_MATCH_LEN {
1281                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1282                best = Self::better_candidate(best, Some(candidate));
1283                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1284                    return best;
1285                }
1286            }
1287        }
1288        best
1289    }
1290
1291    fn extend_backwards(
1292        &self,
1293        mut candidate_pos: usize,
1294        mut abs_pos: usize,
1295        mut match_len: usize,
1296        lit_len: usize,
1297    ) -> MatchCandidate {
1298        let concat = self.live_history();
1299        let min_abs_pos = abs_pos - lit_len;
1300        while abs_pos > min_abs_pos
1301            && candidate_pos > self.history_abs_start
1302            && concat[candidate_pos - self.history_abs_start - 1]
1303                == concat[abs_pos - self.history_abs_start - 1]
1304        {
1305            candidate_pos -= 1;
1306            abs_pos -= 1;
1307            match_len += 1;
1308        }
1309        MatchCandidate {
1310            start: abs_pos,
1311            offset: abs_pos - candidate_pos,
1312            match_len,
1313        }
1314    }
1315
1316    fn better_candidate(
1317        lhs: Option<MatchCandidate>,
1318        rhs: Option<MatchCandidate>,
1319    ) -> Option<MatchCandidate> {
1320        match (lhs, rhs) {
1321            (None, other) | (other, None) => other,
1322            (Some(lhs), Some(rhs)) => {
1323                if rhs.match_len > lhs.match_len
1324                    || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1325                {
1326                    Some(rhs)
1327                } else {
1328                    Some(lhs)
1329                }
1330            }
1331        }
1332    }
1333
1334    fn insert_positions(&mut self, start: usize, end: usize) {
1335        for pos in start..end {
1336            self.insert_position(pos);
1337        }
1338    }
1339
1340    fn insert_position(&mut self, pos: usize) {
1341        let idx = pos - self.history_abs_start;
1342        let short = {
1343            let concat = self.live_history();
1344            (idx + 4 <= concat.len()).then(|| Self::hash4(&concat[idx..]))
1345        };
1346        if let Some(short) = short {
1347            let bucket = &mut self.short_hash[short];
1348            if bucket[0] != pos {
1349                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1350                bucket[0] = pos;
1351            }
1352        }
1353
1354        let long = {
1355            let concat = self.live_history();
1356            (idx + 8 <= concat.len()).then(|| Self::hash8(&concat[idx..]))
1357        };
1358        if let Some(long) = long {
1359            let bucket = &mut self.long_hash[long];
1360            if bucket[0] != pos {
1361                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1362                bucket[0] = pos;
1363            }
1364        }
1365    }
1366
1367    fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1368        let concat = self.live_history();
1369        let idx = pos - self.history_abs_start;
1370        (idx + 4 <= concat.len())
1371            .then(|| self.short_hash[Self::hash4(&concat[idx..])])
1372            .into_iter()
1373            .flatten()
1374            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1375    }
1376
1377    fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1378        let concat = self.live_history();
1379        let idx = pos - self.history_abs_start;
1380        (idx + 8 <= concat.len())
1381            .then(|| self.long_hash[Self::hash8(&concat[idx..])])
1382            .into_iter()
1383            .flatten()
1384            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1385    }
1386
1387    fn hash4(data: &[u8]) -> usize {
1388        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1389        Self::hash_bits(value)
1390    }
1391
1392    fn hash8(data: &[u8]) -> usize {
1393        let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1394        Self::hash_bits(value)
1395    }
1396
1397    fn hash_bits(value: u64) -> usize {
1398        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1399        ((value.wrapping_mul(PRIME)) >> (64 - DFAST_HASH_BITS)) as usize
1400    }
1401}
1402
1403struct HcMatchGenerator {
1404    max_window_size: usize,
1405    window: VecDeque<Vec<u8>>,
1406    window_size: usize,
1407    history: Vec<u8>,
1408    history_start: usize,
1409    history_abs_start: usize,
1410    offset_hist: [u32; 3],
1411    hash_table: Vec<u32>,
1412    chain_table: Vec<u32>,
1413    lazy_depth: u8,
1414    hash_log: usize,
1415    chain_log: usize,
1416    search_depth: usize,
1417    target_len: usize,
1418}
1419
1420impl HcMatchGenerator {
1421    fn new(max_window_size: usize) -> Self {
1422        Self {
1423            max_window_size,
1424            window: VecDeque::new(),
1425            window_size: 0,
1426            history: Vec::new(),
1427            history_start: 0,
1428            history_abs_start: 0,
1429            offset_hist: [1, 4, 8],
1430            hash_table: Vec::new(),
1431            chain_table: Vec::new(),
1432            lazy_depth: 2,
1433            hash_log: HC_HASH_LOG,
1434            chain_log: HC_CHAIN_LOG,
1435            search_depth: HC_SEARCH_DEPTH,
1436            target_len: HC_TARGET_LEN,
1437        }
1438    }
1439
1440    fn configure(&mut self, config: HcConfig) {
1441        let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
1442        self.hash_log = config.hash_log;
1443        self.chain_log = config.chain_log;
1444        self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
1445        self.target_len = config.target_len;
1446        if resize && !self.hash_table.is_empty() {
1447            // Force reallocation on next ensure_tables() call.
1448            self.hash_table.clear();
1449            self.chain_table.clear();
1450        }
1451    }
1452
1453    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1454        self.window_size = 0;
1455        self.history.clear();
1456        self.history_start = 0;
1457        self.history_abs_start = 0;
1458        self.offset_hist = [1, 4, 8];
1459        if !self.hash_table.is_empty() {
1460            self.hash_table.fill(HC_EMPTY);
1461            self.chain_table.fill(HC_EMPTY);
1462        }
1463        for mut data in self.window.drain(..) {
1464            data.resize(data.capacity(), 0);
1465            reuse_space(data);
1466        }
1467    }
1468
1469    fn get_last_space(&self) -> &[u8] {
1470        self.window.back().unwrap().as_slice()
1471    }
1472
1473    // History duplicates window data for O(1) contiguous access during match
1474    // finding (common_prefix_len, extend_backwards). Same pattern as
1475    // DfastMatchGenerator. Peak: ~2x window size for data buffers + 6 MB tables.
1476    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1477        assert!(data.len() <= self.max_window_size);
1478        while self.window_size + data.len() > self.max_window_size {
1479            let removed = self.window.pop_front().unwrap();
1480            self.window_size -= removed.len();
1481            self.history_start += removed.len();
1482            self.history_abs_start += removed.len();
1483            reuse_space(removed);
1484        }
1485        self.compact_history();
1486        self.history.extend_from_slice(&data);
1487        self.window_size += data.len();
1488        self.window.push_back(data);
1489    }
1490
1491    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1492        while self.window_size > self.max_window_size {
1493            let removed = self.window.pop_front().unwrap();
1494            self.window_size -= removed.len();
1495            self.history_start += removed.len();
1496            self.history_abs_start += removed.len();
1497            reuse_space(removed);
1498        }
1499    }
1500
1501    /// Backfill positions from the tail of the previous slice that couldn't be
1502    /// hashed at the time (insert_position needs 4 bytes of lookahead).
1503    fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
1504        let backfill_start = current_abs_start
1505            .saturating_sub(3)
1506            .max(self.history_abs_start);
1507        if backfill_start < current_abs_start {
1508            self.insert_positions(backfill_start, current_abs_start);
1509        }
1510    }
1511
1512    fn skip_matching(&mut self) {
1513        self.ensure_tables();
1514        let current_len = self.window.back().unwrap().len();
1515        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1516        self.backfill_boundary_positions(current_abs_start);
1517        self.insert_positions(current_abs_start, current_abs_start + current_len);
1518    }
1519
1520    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1521        self.ensure_tables();
1522
1523        let current_len = self.window.back().unwrap().len();
1524        if current_len == 0 {
1525            return;
1526        }
1527
1528        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1529        self.backfill_boundary_positions(current_abs_start);
1530
1531        let mut pos = 0usize;
1532        let mut literals_start = 0usize;
1533        while pos + HC_MIN_MATCH_LEN <= current_len {
1534            let abs_pos = current_abs_start + pos;
1535            let lit_len = pos - literals_start;
1536
1537            let best = self.find_best_match(abs_pos, lit_len);
1538            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1539                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1540                let current = self.window.back().unwrap().as_slice();
1541                let start = candidate.start - current_abs_start;
1542                let literals = &current[literals_start..start];
1543                handle_sequence(Sequence::Triple {
1544                    literals,
1545                    offset: candidate.offset,
1546                    match_len: candidate.match_len,
1547                });
1548                let _ = encode_offset_with_history(
1549                    candidate.offset as u32,
1550                    literals.len() as u32,
1551                    &mut self.offset_hist,
1552                );
1553                pos = start + candidate.match_len;
1554                literals_start = pos;
1555            } else {
1556                self.insert_position(abs_pos);
1557                pos += 1;
1558            }
1559        }
1560
1561        // Insert remaining hashable positions in the tail (the matching loop
1562        // stops at HC_MIN_MATCH_LEN but insert_position only needs 4 bytes).
1563        while pos + 4 <= current_len {
1564            self.insert_position(current_abs_start + pos);
1565            pos += 1;
1566        }
1567
1568        if literals_start < current_len {
1569            let current = self.window.back().unwrap().as_slice();
1570            handle_sequence(Sequence::Literals {
1571                literals: &current[literals_start..],
1572            });
1573        }
1574    }
1575
1576    fn ensure_tables(&mut self) {
1577        if self.hash_table.is_empty() {
1578            self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
1579            self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
1580        }
1581    }
1582
1583    fn compact_history(&mut self) {
1584        if self.history_start == 0 {
1585            return;
1586        }
1587        if self.history_start >= self.max_window_size
1588            || self.history_start * 2 >= self.history.len()
1589        {
1590            self.history.drain(..self.history_start);
1591            self.history_start = 0;
1592        }
1593    }
1594
1595    fn live_history(&self) -> &[u8] {
1596        &self.history[self.history_start..]
1597    }
1598
1599    fn history_abs_end(&self) -> usize {
1600        self.history_abs_start + self.live_history().len()
1601    }
1602
1603    fn hash_position(&self, data: &[u8]) -> usize {
1604        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1605        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1606        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
1607    }
1608
1609    fn insert_position(&mut self, abs_pos: usize) {
1610        let idx = abs_pos - self.history_abs_start;
1611        let concat = self.live_history();
1612        if idx + 4 > concat.len() {
1613            return;
1614        }
1615        let hash = self.hash_position(&concat[idx..]);
1616        // Store as (abs_pos + 1) so HC_EMPTY (0) never collides with a valid
1617        // position. Guard on usize before cast to avoid silent u32 truncation.
1618        // Streams >4 GiB stop inserting; matches degrade to repcodes-only.
1619        // TODO(#51): rebase table positions to avoid 4 GiB cutoff
1620        if abs_pos >= u32::MAX as usize {
1621            return;
1622        }
1623        let pos_u32 = abs_pos as u32;
1624        let stored = pos_u32 + 1;
1625        let chain_idx = pos_u32 as usize & ((1 << self.chain_log) - 1);
1626        let prev = self.hash_table[hash];
1627        self.chain_table[chain_idx] = prev;
1628        self.hash_table[hash] = stored;
1629    }
1630
1631    fn insert_positions(&mut self, start: usize, end: usize) {
1632        for pos in start..end {
1633            self.insert_position(pos);
1634        }
1635    }
1636
1637    // Fixed-size stack array is intentional: it avoids heap allocation on
1638    // the hot path and the sentinel loop exits at self.search_depth.
1639    fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
1640        let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
1641        let idx = abs_pos - self.history_abs_start;
1642        let concat = self.live_history();
1643        if idx + 4 > concat.len() {
1644            return buf;
1645        }
1646        let hash = self.hash_position(&concat[idx..]);
1647        let chain_mask = (1 << self.chain_log) - 1;
1648
1649        let mut cur = self.hash_table[hash];
1650        let mut filled = 0;
1651        // Follow chain up to search_depth valid candidates, skipping stale
1652        // entries (evicted from window) instead of stopping at them.
1653        // Stored values are (abs_pos + 1); decode with wrapping_sub(1).
1654        // Break on self-loops (masked chain_idx collision at periodicity).
1655        // Cap total steps at 4x search depth to bound time spent skipping
1656        // stale entries while still finding valid candidates deeper in chain.
1657        let mut steps = 0;
1658        let max_chain_steps = self.search_depth * 4;
1659        while filled < self.search_depth && steps < max_chain_steps {
1660            if cur == HC_EMPTY {
1661                break;
1662            }
1663            let candidate_abs = cur.wrapping_sub(1) as usize;
1664            let next = self.chain_table[candidate_abs & chain_mask];
1665            steps += 1;
1666            if next == cur {
1667                // Self-loop: two positions share chain_idx, stop to avoid
1668                // spinning on the same candidate forever.
1669                if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
1670                    buf[filled] = candidate_abs;
1671                }
1672                break;
1673            }
1674            cur = next;
1675            if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
1676                continue;
1677            }
1678            buf[filled] = candidate_abs;
1679            filled += 1;
1680        }
1681        buf
1682    }
1683
1684    fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1685        let rep = self.repcode_candidate(abs_pos, lit_len);
1686        let hash = self.hash_chain_candidate(abs_pos, lit_len);
1687        Self::better_candidate(rep, hash)
1688    }
1689
1690    fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1691        let concat = self.live_history();
1692        let current_idx = abs_pos - self.history_abs_start;
1693        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
1694            return None;
1695        }
1696
1697        let mut best: Option<MatchCandidate> = None;
1698        for candidate_abs in self.chain_candidates(abs_pos) {
1699            if candidate_abs == usize::MAX {
1700                break;
1701            }
1702            let candidate_idx = candidate_abs - self.history_abs_start;
1703            let match_len =
1704                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1705            if match_len >= HC_MIN_MATCH_LEN {
1706                let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
1707                best = Self::better_candidate(best, Some(candidate));
1708                if best.is_some_and(|b| b.match_len >= self.target_len) {
1709                    return best;
1710                }
1711            }
1712        }
1713        best
1714    }
1715
1716    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1717        let reps = if lit_len == 0 {
1718            [
1719                Some(self.offset_hist[1] as usize),
1720                Some(self.offset_hist[2] as usize),
1721                (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1722            ]
1723        } else {
1724            [
1725                Some(self.offset_hist[0] as usize),
1726                Some(self.offset_hist[1] as usize),
1727                Some(self.offset_hist[2] as usize),
1728            ]
1729        };
1730
1731        let concat = self.live_history();
1732        let current_idx = abs_pos - self.history_abs_start;
1733        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
1734            return None;
1735        }
1736
1737        let mut best = None;
1738        for rep in reps.into_iter().flatten() {
1739            if rep == 0 || rep > abs_pos {
1740                continue;
1741            }
1742            let candidate_pos = abs_pos - rep;
1743            if candidate_pos < self.history_abs_start {
1744                continue;
1745            }
1746            let candidate_idx = candidate_pos - self.history_abs_start;
1747            let match_len =
1748                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1749            if match_len >= HC_MIN_MATCH_LEN {
1750                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1751                best = Self::better_candidate(best, Some(candidate));
1752            }
1753        }
1754        best
1755    }
1756
1757    fn extend_backwards(
1758        &self,
1759        mut candidate_pos: usize,
1760        mut abs_pos: usize,
1761        mut match_len: usize,
1762        lit_len: usize,
1763    ) -> MatchCandidate {
1764        let concat = self.live_history();
1765        let min_abs_pos = abs_pos - lit_len;
1766        while abs_pos > min_abs_pos
1767            && candidate_pos > self.history_abs_start
1768            && concat[candidate_pos - self.history_abs_start - 1]
1769                == concat[abs_pos - self.history_abs_start - 1]
1770        {
1771            candidate_pos -= 1;
1772            abs_pos -= 1;
1773            match_len += 1;
1774        }
1775        MatchCandidate {
1776            start: abs_pos,
1777            offset: abs_pos - candidate_pos,
1778            match_len,
1779        }
1780    }
1781
1782    fn better_candidate(
1783        lhs: Option<MatchCandidate>,
1784        rhs: Option<MatchCandidate>,
1785    ) -> Option<MatchCandidate> {
1786        match (lhs, rhs) {
1787            (None, other) | (other, None) => other,
1788            (Some(lhs), Some(rhs)) => {
1789                let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
1790                let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
1791                if rhs_gain > lhs_gain {
1792                    Some(rhs)
1793                } else {
1794                    Some(lhs)
1795                }
1796            }
1797        }
1798    }
1799
1800    fn match_gain(match_len: usize, offset: usize) -> i32 {
1801        debug_assert!(
1802            offset > 0,
1803            "zstd offsets are 1-indexed, offset=0 is invalid"
1804        );
1805        let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
1806        (match_len as i32) * 4 - offset_bits
1807    }
1808
1809    // Lazy lookahead queries pos+1/pos+2 before they are inserted into hash
1810    // tables — matching C zstd behavior. Seeding before comparing would let a
1811    // position match against itself, changing semantics.
1812    fn pick_lazy_match(
1813        &self,
1814        abs_pos: usize,
1815        lit_len: usize,
1816        best: Option<MatchCandidate>,
1817    ) -> Option<MatchCandidate> {
1818        let best = best?;
1819        if best.match_len >= self.target_len
1820            || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
1821        {
1822            return Some(best);
1823        }
1824
1825        let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
1826
1827        // Lazy check: evaluate pos+1
1828        let next = self.find_best_match(abs_pos + 1, lit_len + 1);
1829        if let Some(next) = next {
1830            let next_gain = Self::match_gain(next.match_len, next.offset);
1831            if next_gain > current_gain {
1832                return None;
1833            }
1834        }
1835
1836        // Lazy2 check: also evaluate pos+2
1837        if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
1838            let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
1839            if let Some(next2) = next2 {
1840                let next2_gain = Self::match_gain(next2.match_len, next2.offset);
1841                // Must beat current gain + extra literal cost
1842                if next2_gain > current_gain + 4 {
1843                    return None;
1844                }
1845            }
1846        }
1847
1848        Some(best)
1849    }
1850}
1851
1852#[test]
1853fn matches() {
1854    let mut matcher = MatchGenerator::new(1000);
1855    let mut original_data = Vec::new();
1856    let mut reconstructed = Vec::new();
1857
1858    let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
1859        Sequence::Literals { literals } => {
1860            assert!(!literals.is_empty());
1861            reconstructed.extend_from_slice(literals);
1862        }
1863        Sequence::Triple {
1864            literals,
1865            offset,
1866            match_len,
1867        } => {
1868            assert!(offset > 0);
1869            assert!(match_len >= MIN_MATCH_LEN);
1870            reconstructed.extend_from_slice(literals);
1871            assert!(offset <= reconstructed.len());
1872            let start = reconstructed.len() - offset;
1873            for i in 0..match_len {
1874                let byte = reconstructed[start + i];
1875                reconstructed.push(byte);
1876            }
1877        }
1878    };
1879
1880    matcher.add_data(
1881        alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
1882        SuffixStore::with_capacity(100),
1883        |_, _| {},
1884    );
1885    original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1886
1887    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1888
1889    assert!(!matcher.next_sequence(|_| {}));
1890
1891    matcher.add_data(
1892        alloc::vec![
1893            1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
1894        ],
1895        SuffixStore::with_capacity(100),
1896        |_, _| {},
1897    );
1898    original_data.extend_from_slice(&[
1899        1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
1900    ]);
1901
1902    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1903    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1904    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1905    assert!(!matcher.next_sequence(|_| {}));
1906
1907    matcher.add_data(
1908        alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
1909        SuffixStore::with_capacity(100),
1910        |_, _| {},
1911    );
1912    original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
1913
1914    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1915    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1916    assert!(!matcher.next_sequence(|_| {}));
1917
1918    matcher.add_data(
1919        alloc::vec![0, 0, 0, 0, 0],
1920        SuffixStore::with_capacity(100),
1921        |_, _| {},
1922    );
1923    original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
1924
1925    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1926    assert!(!matcher.next_sequence(|_| {}));
1927
1928    matcher.add_data(
1929        alloc::vec![7, 8, 9, 10, 11],
1930        SuffixStore::with_capacity(100),
1931        |_, _| {},
1932    );
1933    original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
1934
1935    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1936    assert!(!matcher.next_sequence(|_| {}));
1937
1938    matcher.add_data(
1939        alloc::vec![1, 3, 5, 7, 9],
1940        SuffixStore::with_capacity(100),
1941        |_, _| {},
1942    );
1943    matcher.skip_matching();
1944    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
1945    reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
1946    assert!(!matcher.next_sequence(|_| {}));
1947
1948    matcher.add_data(
1949        alloc::vec![1, 3, 5, 7, 9],
1950        SuffixStore::with_capacity(100),
1951        |_, _| {},
1952    );
1953    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
1954
1955    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1956    assert!(!matcher.next_sequence(|_| {}));
1957
1958    matcher.add_data(
1959        alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
1960        SuffixStore::with_capacity(100),
1961        |_, _| {},
1962    );
1963    original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
1964
1965    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1966    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
1967    assert!(!matcher.next_sequence(|_| {}));
1968
1969    assert_eq!(reconstructed, original_data);
1970}
1971
1972#[test]
1973fn dfast_matches_roundtrip_multi_block_pattern() {
1974    let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
1975    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
1976    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
1977
1978    let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
1979    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
1980        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
1981        Sequence::Triple {
1982            literals,
1983            offset,
1984            match_len,
1985        } => {
1986            decoded.extend_from_slice(literals);
1987            let start = decoded.len() - offset;
1988            for i in 0..match_len {
1989                let byte = decoded[start + i];
1990                decoded.push(byte);
1991            }
1992        }
1993    };
1994
1995    matcher.add_data(first_block.clone(), |_| {});
1996    let mut history = Vec::new();
1997    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
1998    assert_eq!(history, first_block);
1999
2000    matcher.add_data(second_block.clone(), |_| {});
2001    let prefix_len = history.len();
2002    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2003
2004    assert_eq!(&history[prefix_len..], second_block.as_slice());
2005}
2006
2007#[test]
2008fn driver_switches_backends_and_initializes_dfast_via_reset() {
2009    let mut driver = MatchGeneratorDriver::new(32, 2);
2010
2011    driver.reset(CompressionLevel::Default);
2012    assert_eq!(driver.window_size(), DFAST_DEFAULT_WINDOW_SIZE as u64);
2013
2014    let mut first = driver.get_next_space();
2015    first[..12].copy_from_slice(b"abcabcabcabc");
2016    first.truncate(12);
2017    driver.commit_space(first);
2018    assert_eq!(driver.get_last_space(), b"abcabcabcabc");
2019    driver.skip_matching();
2020
2021    let mut second = driver.get_next_space();
2022    second[..12].copy_from_slice(b"abcabcabcabc");
2023    second.truncate(12);
2024    driver.commit_space(second);
2025
2026    let mut reconstructed = b"abcabcabcabc".to_vec();
2027    driver.start_matching(|seq| match seq {
2028        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2029        Sequence::Triple {
2030            literals,
2031            offset,
2032            match_len,
2033        } => {
2034            reconstructed.extend_from_slice(literals);
2035            let start = reconstructed.len() - offset;
2036            for i in 0..match_len {
2037                let byte = reconstructed[start + i];
2038                reconstructed.push(byte);
2039            }
2040        }
2041    });
2042    assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
2043
2044    driver.reset(CompressionLevel::Fastest);
2045    assert_eq!(driver.window_size(), 64);
2046}
2047
2048#[test]
2049fn driver_best_to_fastest_releases_oversized_hc_tables() {
2050    let mut driver = MatchGeneratorDriver::new(32, 2);
2051
2052    // Initialize at Best — allocates large HC tables (2M hash, 1M chain).
2053    driver.reset(CompressionLevel::Best);
2054    assert_eq!(driver.window_size(), BEST_DEFAULT_WINDOW_SIZE as u64);
2055
2056    // Feed data so tables are actually allocated via ensure_tables().
2057    let mut space = driver.get_next_space();
2058    space[..12].copy_from_slice(b"abcabcabcabc");
2059    space.truncate(12);
2060    driver.commit_space(space);
2061    driver.skip_matching();
2062
2063    // Switch to Fastest — must release HC tables.
2064    driver.reset(CompressionLevel::Fastest);
2065    assert_eq!(driver.window_size(), 64);
2066
2067    // HC matcher should have empty tables after backend switch.
2068    let hc = driver.hc_match_generator.as_ref().unwrap();
2069    assert!(
2070        hc.hash_table.is_empty(),
2071        "HC hash_table should be released after switching away from Best"
2072    );
2073    assert!(
2074        hc.chain_table.is_empty(),
2075        "HC chain_table should be released after switching away from Best"
2076    );
2077}
2078
2079#[test]
2080fn driver_better_to_best_resizes_hc_tables() {
2081    let mut driver = MatchGeneratorDriver::new(32, 2);
2082
2083    // Initialize at Better — allocates small HC tables (1M hash, 512K chain).
2084    driver.reset(CompressionLevel::Better);
2085    assert_eq!(driver.window_size(), BETTER_DEFAULT_WINDOW_SIZE as u64);
2086
2087    let mut space = driver.get_next_space();
2088    space[..12].copy_from_slice(b"abcabcabcabc");
2089    space.truncate(12);
2090    driver.commit_space(space);
2091    driver.skip_matching();
2092
2093    let hc = driver.hc_match_generator.as_ref().unwrap();
2094    let better_hash_len = hc.hash_table.len();
2095    let better_chain_len = hc.chain_table.len();
2096
2097    // Switch to Best — must resize to larger tables.
2098    driver.reset(CompressionLevel::Best);
2099    assert_eq!(driver.window_size(), BEST_DEFAULT_WINDOW_SIZE as u64);
2100
2101    // Feed data to trigger ensure_tables with new sizes.
2102    let mut space = driver.get_next_space();
2103    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2104    space.truncate(12);
2105    driver.commit_space(space);
2106    driver.skip_matching();
2107
2108    let hc = driver.hc_match_generator.as_ref().unwrap();
2109    assert!(
2110        hc.hash_table.len() > better_hash_len,
2111        "Best hash_table ({}) should be larger than Better ({})",
2112        hc.hash_table.len(),
2113        better_hash_len
2114    );
2115    assert!(
2116        hc.chain_table.len() > better_chain_len,
2117        "Best chain_table ({}) should be larger than Better ({})",
2118        hc.chain_table.len(),
2119        better_chain_len
2120    );
2121}
2122
2123#[test]
2124fn prime_with_dictionary_preserves_history_for_first_full_block() {
2125    let mut driver = MatchGeneratorDriver::new(8, 1);
2126    driver.reset(CompressionLevel::Fastest);
2127
2128    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2129
2130    let mut space = driver.get_next_space();
2131    space.clear();
2132    space.extend_from_slice(b"abcdefgh");
2133    driver.commit_space(space);
2134
2135    let mut saw_match = false;
2136    driver.start_matching(|seq| {
2137        if let Sequence::Triple {
2138            literals,
2139            offset,
2140            match_len,
2141        } = seq
2142            && literals.is_empty()
2143            && offset == 8
2144            && match_len >= MIN_MATCH_LEN
2145        {
2146            saw_match = true;
2147        }
2148    });
2149
2150    assert!(
2151        saw_match,
2152        "first full block should still match dictionary-primed history"
2153    );
2154}
2155
2156#[test]
2157fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
2158    let mut driver = MatchGeneratorDriver::new(8, 1);
2159    driver.reset(CompressionLevel::Fastest);
2160
2161    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2162
2163    let mut space = driver.get_next_space();
2164    space.clear();
2165    space.extend_from_slice(b"abcdefgh");
2166    driver.commit_space(space);
2167
2168    let mut saw_match = false;
2169    driver.start_matching(|seq| {
2170        if let Sequence::Triple {
2171            literals,
2172            offset,
2173            match_len,
2174        } = seq
2175            && literals.is_empty()
2176            && offset == 24
2177            && match_len >= MIN_MATCH_LEN
2178        {
2179            saw_match = true;
2180        }
2181    });
2182
2183    assert!(
2184        saw_match,
2185        "dictionary bytes should remain addressable until frame output exceeds the live window"
2186    );
2187}
2188
2189#[test]
2190fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
2191    let mut driver = MatchGeneratorDriver::new(8, 1);
2192    driver.reset(CompressionLevel::Fastest);
2193
2194    driver.prime_with_dictionary(&[], [11, 7, 3]);
2195
2196    assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
2197}
2198
2199#[test]
2200fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
2201    let mut driver = MatchGeneratorDriver::new(8, 1);
2202    driver.reset(CompressionLevel::Default);
2203
2204    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2205
2206    let mut space = driver.get_next_space();
2207    space.clear();
2208    space.extend_from_slice(b"abcdefgh");
2209    driver.commit_space(space);
2210
2211    let mut saw_match = false;
2212    driver.start_matching(|seq| {
2213        if let Sequence::Triple {
2214            literals,
2215            offset,
2216            match_len,
2217        } = seq
2218            && literals.is_empty()
2219            && offset == 8
2220            && match_len >= DFAST_MIN_MATCH_LEN
2221        {
2222            saw_match = true;
2223        }
2224    });
2225
2226    assert!(
2227        saw_match,
2228        "dfast backend should match dictionary-primed history in first full block"
2229    );
2230}
2231
2232#[test]
2233fn prime_with_dictionary_does_not_inflate_reported_window_size() {
2234    let mut driver = MatchGeneratorDriver::new(8, 1);
2235    driver.reset(CompressionLevel::Fastest);
2236
2237    let before = driver.window_size();
2238    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2239    let after = driver.window_size();
2240
2241    assert_eq!(
2242        after, before,
2243        "dictionary retention budget must not change reported frame window size"
2244    );
2245}
2246
2247#[test]
2248fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
2249    let mut driver = MatchGeneratorDriver::new(8, 2);
2250    driver.reset(CompressionLevel::Fastest);
2251
2252    // This dictionary leaves a 1-byte tail chunk (capacity=1 suffix table),
2253    // which should never be committed to the matcher window.
2254    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
2255
2256    assert!(
2257        driver
2258            .match_generator
2259            .window
2260            .iter()
2261            .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
2262        "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
2263    );
2264}
2265
2266#[test]
2267fn prime_with_dictionary_counts_only_committed_tail_budget() {
2268    let mut driver = MatchGeneratorDriver::new(8, 1);
2269    driver.reset(CompressionLevel::Fastest);
2270
2271    let before = driver.match_generator.max_window_size;
2272    // One full slice plus a 1-byte tail that cannot be committed.
2273    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
2274
2275    assert_eq!(
2276        driver.match_generator.max_window_size,
2277        before + 8,
2278        "retention budget must account only for dictionary bytes actually committed to history"
2279    );
2280}
2281
2282#[test]
2283fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
2284    let mut driver = MatchGeneratorDriver::new(8, 1);
2285    driver.reset(CompressionLevel::Default);
2286
2287    let before = driver.dfast_matcher().max_window_size;
2288    // One full slice plus a 4-byte tail. Dfast can still use this tail through
2289    // short-hash overlap into the next block, so it should stay retained.
2290    driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
2291
2292    assert_eq!(
2293        driver.dfast_matcher().max_window_size,
2294        before + 12,
2295        "dfast retention budget should include 4-byte dictionary tails"
2296    );
2297}
2298
2299#[test]
2300fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
2301    let mut driver = MatchGeneratorDriver::new(8, 1);
2302    driver.reset(CompressionLevel::Fastest);
2303
2304    let base_window = driver.match_generator.max_window_size;
2305    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2306    assert_eq!(driver.match_generator.max_window_size, base_window + 24);
2307
2308    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2309        let mut space = driver.get_next_space();
2310        space.clear();
2311        space.extend_from_slice(block);
2312        driver.commit_space(space);
2313        driver.skip_matching();
2314    }
2315
2316    assert_eq!(
2317        driver.dictionary_retained_budget, 0,
2318        "dictionary budget should be fully retired once primed dict slices are evicted"
2319    );
2320    assert_eq!(
2321        driver.match_generator.max_window_size, base_window,
2322        "retired dictionary budget must not remain reusable for live history"
2323    );
2324}
2325
2326#[test]
2327fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
2328    let mut driver = MatchGeneratorDriver::new(8, 1);
2329    driver.reset(CompressionLevel::Default);
2330    // Use a small live window in this regression so dictionary-primed slices are
2331    // evicted quickly and budget retirement can be asserted deterministically.
2332    driver.dfast_matcher_mut().max_window_size = 8;
2333    driver.reported_window_size = 8;
2334
2335    let base_window = driver.dfast_matcher().max_window_size;
2336    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2337    assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
2338
2339    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2340        let mut space = driver.get_next_space();
2341        space.clear();
2342        space.extend_from_slice(block);
2343        driver.commit_space(space);
2344        driver.skip_matching();
2345    }
2346
2347    assert_eq!(
2348        driver.dictionary_retained_budget, 0,
2349        "dictionary budget should be fully retired once primed dict slices are evicted"
2350    );
2351    assert_eq!(
2352        driver.dfast_matcher().max_window_size,
2353        base_window,
2354        "retired dictionary budget must not remain reusable for live history"
2355    );
2356}
2357
2358#[test]
2359fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
2360    let mut driver = MatchGeneratorDriver::new(8, 1);
2361    driver.reset(CompressionLevel::Better);
2362
2363    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2364
2365    let mut space = driver.get_next_space();
2366    space.clear();
2367    // Repeat the dictionary content so the HC matcher can find it.
2368    // HC_MIN_MATCH_LEN is 5, so an 8-byte match is well above threshold.
2369    space.extend_from_slice(b"abcdefgh");
2370    driver.commit_space(space);
2371
2372    let mut saw_match = false;
2373    driver.start_matching(|seq| {
2374        if let Sequence::Triple {
2375            literals,
2376            offset,
2377            match_len,
2378        } = seq
2379            && literals.is_empty()
2380            && offset == 8
2381            && match_len >= HC_MIN_MATCH_LEN
2382        {
2383            saw_match = true;
2384        }
2385    });
2386
2387    assert!(
2388        saw_match,
2389        "hash-chain backend should match dictionary-primed history in first full block"
2390    );
2391}
2392
2393#[test]
2394fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
2395    let mut driver = MatchGeneratorDriver::new(8, 1);
2396    driver.reset(CompressionLevel::Better);
2397    // Use a small live window so dictionary-primed slices are evicted quickly.
2398    driver.hc_matcher_mut().max_window_size = 8;
2399    driver.reported_window_size = 8;
2400
2401    let base_window = driver.hc_matcher().max_window_size;
2402    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2403    assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
2404
2405    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2406        let mut space = driver.get_next_space();
2407        space.clear();
2408        space.extend_from_slice(block);
2409        driver.commit_space(space);
2410        driver.skip_matching();
2411    }
2412
2413    assert_eq!(
2414        driver.dictionary_retained_budget, 0,
2415        "dictionary budget should be fully retired once primed dict slices are evicted"
2416    );
2417    assert_eq!(
2418        driver.hc_matcher().max_window_size,
2419        base_window,
2420        "retired dictionary budget must not remain reusable for live history"
2421    );
2422}
2423
2424#[test]
2425fn suffix_store_with_single_slot_does_not_panic_on_keying() {
2426    let mut suffixes = SuffixStore::with_capacity(1);
2427    suffixes.insert(b"abcde", 0);
2428    assert!(suffixes.contains_key(b"abcde"));
2429    assert_eq!(suffixes.get(b"abcde"), Some(0));
2430}
2431
2432#[test]
2433fn fastest_reset_uses_interleaved_hash_fill_step() {
2434    let mut driver = MatchGeneratorDriver::new(32, 2);
2435
2436    driver.reset(CompressionLevel::Uncompressed);
2437    assert_eq!(driver.match_generator.hash_fill_step, 1);
2438
2439    driver.reset(CompressionLevel::Fastest);
2440    assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
2441
2442    // Better uses the HashChain backend with lazy2; verify that the backend switch
2443    // happened and the lazy_depth is configured correctly.
2444    driver.reset(CompressionLevel::Better);
2445    assert_eq!(driver.active_backend, MatcherBackend::HashChain);
2446    assert_eq!(driver.window_size(), BETTER_DEFAULT_WINDOW_SIZE as u64);
2447    assert_eq!(driver.hc_matcher().lazy_depth, 2);
2448}
2449
2450#[test]
2451fn simple_matcher_updates_offset_history_after_emitting_match() {
2452    let mut matcher = MatchGenerator::new(64);
2453    matcher.add_data(
2454        b"abcdeabcdeabcde".to_vec(),
2455        SuffixStore::with_capacity(64),
2456        |_, _| {},
2457    );
2458
2459    assert!(matcher.next_sequence(|seq| {
2460        assert_eq!(
2461            seq,
2462            Sequence::Triple {
2463                literals: b"abcde",
2464                offset: 5,
2465                match_len: 10,
2466            }
2467        );
2468    }));
2469    assert_eq!(matcher.offset_hist, [5, 1, 4]);
2470}
2471
2472#[test]
2473fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
2474    let mut matcher = MatchGenerator::new(64);
2475    matcher.add_data(
2476        b"abcdefghijabcdefghij".to_vec(),
2477        SuffixStore::with_capacity(64),
2478        |_, _| {},
2479    );
2480
2481    matcher.suffix_idx = 10;
2482    matcher.last_idx_in_sequence = 10;
2483    matcher.offset_hist = [99, 10, 4];
2484
2485    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2486    assert_eq!(candidate, Some((10, 10)));
2487}
2488
2489#[test]
2490fn simple_matcher_repcode_can_target_previous_window_entry() {
2491    let mut matcher = MatchGenerator::new(64);
2492    matcher.add_data(
2493        b"abcdefghij".to_vec(),
2494        SuffixStore::with_capacity(64),
2495        |_, _| {},
2496    );
2497    matcher.skip_matching();
2498    matcher.add_data(
2499        b"abcdefghij".to_vec(),
2500        SuffixStore::with_capacity(64),
2501        |_, _| {},
2502    );
2503
2504    matcher.offset_hist = [99, 10, 4];
2505
2506    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
2507    assert_eq!(candidate, Some((10, 10)));
2508}
2509
2510#[test]
2511fn simple_matcher_zero_literal_repcode_checks_rep2() {
2512    let mut matcher = MatchGenerator::new(64);
2513    matcher.add_data(
2514        b"abcdefghijabcdefghij".to_vec(),
2515        SuffixStore::with_capacity(64),
2516        |_, _| {},
2517    );
2518    matcher.suffix_idx = 10;
2519    matcher.last_idx_in_sequence = 10;
2520    // rep1=4 does not match at idx 10, rep2=10 does.
2521    matcher.offset_hist = [99, 4, 10];
2522
2523    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2524    assert_eq!(candidate, Some((10, 10)));
2525}
2526
2527#[test]
2528fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
2529    let mut matcher = MatchGenerator::new(64);
2530    matcher.add_data(
2531        b"abcdefghijabcdefghij".to_vec(),
2532        SuffixStore::with_capacity(64),
2533        |_, _| {},
2534    );
2535    matcher.suffix_idx = 10;
2536    matcher.last_idx_in_sequence = 10;
2537    // rep1=4 and rep2=99 do not match; rep0-1 == 10 does.
2538    matcher.offset_hist = [11, 4, 99];
2539
2540    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2541    assert_eq!(candidate, Some((10, 10)));
2542}
2543
2544#[test]
2545fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
2546    let mut matcher = MatchGenerator::new(64);
2547    matcher.add_data(
2548        b"abcdefghij".to_vec(),
2549        SuffixStore::with_capacity(64),
2550        |_, _| {},
2551    );
2552    matcher.skip_matching();
2553    matcher.add_data(
2554        b"klmnopqrst".to_vec(),
2555        SuffixStore::with_capacity(64),
2556        |_, _| {},
2557    );
2558    matcher.suffix_idx = 3;
2559
2560    let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
2561    assert_eq!(candidate, None);
2562}
2563
2564#[test]
2565fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
2566    let mut matcher = MatchGenerator::new(64);
2567    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2568    matcher.add_data(
2569        b"abcdefghijklmnop".to_vec(),
2570        SuffixStore::with_capacity(64),
2571        |_, _| {},
2572    );
2573    matcher.skip_matching();
2574    matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
2575
2576    assert!(matcher.next_sequence(|seq| {
2577        assert_eq!(
2578            seq,
2579            Sequence::Triple {
2580                literals: b"",
2581                offset: 15,
2582                match_len: 5,
2583            }
2584        );
2585    }));
2586    assert!(!matcher.next_sequence(|_| {}));
2587}
2588
2589#[test]
2590fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
2591    let mut matcher = MatchGenerator::new(64);
2592    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2593    matcher.add_data(
2594        b"01234abcde".to_vec(),
2595        SuffixStore::with_capacity(64),
2596        |_, _| {},
2597    );
2598    matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
2599
2600    let last = matcher.window.last().unwrap();
2601    let tail = &last.data[5..10];
2602    assert_eq!(last.suffixes.get(tail), Some(5));
2603}
2604
2605#[test]
2606fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
2607    let mut matcher = MatchGenerator::new(128);
2608    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2609    matcher.add_data(
2610        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
2611        SuffixStore::with_capacity(1 << 16),
2612        |_, _| {},
2613    );
2614
2615    matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
2616
2617    let last = matcher.window.last().unwrap();
2618    let first_key = &last.data[..MIN_MATCH_LEN];
2619    assert_eq!(last.suffixes.get(first_key), None);
2620}
2621
2622#[test]
2623fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
2624    let mut matcher = MatchGenerator::new(128);
2625    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2626    matcher.add_data(
2627        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
2628        SuffixStore::with_capacity(1 << 16),
2629        |_, _| {},
2630    );
2631
2632    matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
2633
2634    let last = matcher.window.last().unwrap();
2635    for pos in [0usize, 3, 6, 9, 12] {
2636        let key = &last.data[pos..pos + MIN_MATCH_LEN];
2637        assert_eq!(
2638            last.suffixes.get(key),
2639            Some(pos),
2640            "expected interleaved suffix registration at pos {pos}"
2641        );
2642    }
2643}
2644
2645#[test]
2646fn dfast_skip_matching_handles_window_eviction() {
2647    let mut matcher = DfastMatchGenerator::new(16);
2648
2649    matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
2650    matcher.skip_matching();
2651    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
2652    matcher.skip_matching();
2653    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
2654
2655    let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
2656    matcher.start_matching(|seq| match seq {
2657        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2658        Sequence::Triple {
2659            literals,
2660            offset,
2661            match_len,
2662        } => {
2663            reconstructed.extend_from_slice(literals);
2664            let start = reconstructed.len() - offset;
2665            for i in 0..match_len {
2666                let byte = reconstructed[start + i];
2667                reconstructed.push(byte);
2668            }
2669        }
2670    });
2671
2672    assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
2673}
2674
2675#[test]
2676fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
2677    let mut matcher = DfastMatchGenerator::new(8);
2678
2679    let mut first = Vec::with_capacity(64);
2680    first.extend_from_slice(b"abcdefgh");
2681    matcher.add_data(first, |_| {});
2682
2683    let mut second = Vec::with_capacity(64);
2684    second.extend_from_slice(b"ijklmnop");
2685
2686    let mut observed_evicted_len = None;
2687    matcher.add_data(second, |data| {
2688        observed_evicted_len = Some(data.len());
2689    });
2690
2691    assert_eq!(
2692        observed_evicted_len,
2693        Some(8),
2694        "eviction callback must report evicted byte length, not backing capacity"
2695    );
2696}
2697
2698#[test]
2699fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
2700    let mut matcher = DfastMatchGenerator::new(16);
2701
2702    let mut first = Vec::with_capacity(64);
2703    first.extend_from_slice(b"abcdefgh");
2704    matcher.add_data(first, |_| {});
2705
2706    let mut second = Vec::with_capacity(64);
2707    second.extend_from_slice(b"ijklmnop");
2708    matcher.add_data(second, |_| {});
2709
2710    matcher.max_window_size = 8;
2711
2712    let mut observed_evicted_len = None;
2713    matcher.trim_to_window(|data| {
2714        observed_evicted_len = Some(data.len());
2715    });
2716
2717    assert_eq!(
2718        observed_evicted_len,
2719        Some(8),
2720        "trim callback must report evicted byte length, not backing capacity"
2721    );
2722}
2723
2724#[test]
2725fn dfast_inserts_tail_positions_for_next_block_matching() {
2726    let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
2727
2728    matcher.add_data(b"012345bcdea".to_vec(), |_| {});
2729    let mut history = Vec::new();
2730    matcher.start_matching(|seq| match seq {
2731        Sequence::Literals { literals } => history.extend_from_slice(literals),
2732        Sequence::Triple { .. } => unreachable!("first block should not match history"),
2733    });
2734    assert_eq!(history, b"012345bcdea");
2735
2736    matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
2737    let mut saw_first_sequence = false;
2738    matcher.start_matching(|seq| {
2739        assert!(!saw_first_sequence, "expected a single cross-block match");
2740        saw_first_sequence = true;
2741        match seq {
2742            Sequence::Literals { .. } => {
2743                panic!("expected tail-anchored cross-block match before any literals")
2744            }
2745            Sequence::Triple {
2746                literals,
2747                offset,
2748                match_len,
2749            } => {
2750                assert_eq!(literals, b"");
2751                assert_eq!(offset, 5);
2752                assert_eq!(match_len, 11);
2753                let start = history.len() - offset;
2754                for i in 0..match_len {
2755                    let byte = history[start + i];
2756                    history.push(byte);
2757                }
2758            }
2759        }
2760    });
2761
2762    assert!(
2763        saw_first_sequence,
2764        "expected tail-anchored cross-block match"
2765    );
2766    assert_eq!(history, b"012345bcdeabcdeabcdeab");
2767}