Skip to main content

structured_zstd/encoding/
match_generator.rs

1//! Matching algorithm used find repeated parts in the original data
2//!
3//! The Zstd format relies on finden repeated sequences of data and compressing these sequences as instructions to the decoder.
4//! A sequence basically tells the decoder "Go back X bytes and copy Y bytes to the end of your decode buffer".
5//!
6//! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data.
7
8use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10use core::convert::TryInto;
11use core::num::NonZeroUsize;
12
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17
18const MIN_MATCH_LEN: usize = 5;
19const FAST_HASH_FILL_STEP: usize = 3;
20const DFAST_MIN_MATCH_LEN: usize = 6;
21const DFAST_TARGET_LEN: usize = 48;
22// Keep these aligned with the issue's zstd level-3/dfast target unless ratio
23// measurements show we can shrink them without regressing acceptance tests.
24const DFAST_HASH_BITS: usize = 20;
25const DFAST_SEARCH_DEPTH: usize = 4;
26const DFAST_EMPTY_SLOT: usize = usize::MAX;
27
28const HC_HASH_LOG: usize = 20;
29const HC_CHAIN_LOG: usize = 19;
30const HC_SEARCH_DEPTH: usize = 16;
31const HC_MIN_MATCH_LEN: usize = 5;
32const HC_TARGET_LEN: usize = 48;
33// Positions are stored as (abs_pos + 1) so that 0 is a safe empty sentinel
34// that can never collide with any valid position, even at the 4 GiB boundary.
35const HC_EMPTY: u32 = 0;
36
37// Maximum search depth across all HC-based levels. Used to size the
38// fixed-length candidate array returned by chain_candidates().
39const MAX_HC_SEARCH_DEPTH: usize = 32;
40
41/// Bundled tuning knobs for the hash-chain matcher. Using a typed config
42/// instead of positional `usize` args eliminates parameter-order hazards.
43#[derive(Copy, Clone)]
44struct HcConfig {
45    hash_log: usize,
46    chain_log: usize,
47    search_depth: usize,
48    target_len: usize,
49}
50
51const HC_CONFIG: HcConfig = HcConfig {
52    hash_log: HC_HASH_LOG,
53    chain_log: HC_CHAIN_LOG,
54    search_depth: HC_SEARCH_DEPTH,
55    target_len: HC_TARGET_LEN,
56};
57
58/// Best-level: deeper search, larger tables, higher target length.
59const BEST_HC_CONFIG: HcConfig = HcConfig {
60    hash_log: 21,
61    chain_log: 20,
62    search_depth: 32,
63    target_len: 128,
64};
65
66/// Resolved tuning parameters for a compression level.
67#[derive(Copy, Clone)]
68struct LevelParams {
69    backend: MatcherBackend,
70    window_log: u8,
71    hash_fill_step: usize,
72    lazy_depth: u8,
73    hc: HcConfig,
74}
75
76fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
77    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
78    window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
79}
80
81/// Parameter table for numeric compression levels 1–22.
82///
83/// Each entry maps a zstd compression level to the best-available matcher
84/// backend and tuning knobs.  Levels that require strategies this crate does
85/// not implement (greedy, btopt, btultra) are approximated with the closest
86/// available backend.
87///
88/// Index 0 = level 1, index 21 = level 22.
89#[rustfmt::skip]
90const LEVEL_TABLE: [LevelParams; 22] = [
91    // Lvl  Strategy       wlog  step  lazy  HC config
92    // ---  -------------- ----  ----  ----  ------------------------------------------
93    /* 1 */ LevelParams { backend: MatcherBackend::Simple,    window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG },
94    /* 2 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG },
95    /* 3 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG },
96    /* 4 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG },
97    /* 5 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4,  target_len: 32  } },
98    /* 6 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8,  target_len: 48  } },
99    /* 7 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48  } },
100    /* 8 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64  } },
101    /* 9 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64  } },
102    /*10 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96  } },
103    /*11 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG },
104    /*12 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 } },
105    /*13 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 } },
106    /*14 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 } },
107    /*15 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 } },
108    /*16 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 } },
109    /*17 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
110    /*18 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
111    /*19 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
112    /*20 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
113    /*21 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
114    /*22 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
115];
116
117/// Smallest window_log the encoder will use regardless of source size.
118const MIN_WINDOW_LOG: u8 = 10;
119
120/// Adjust level parameters for a known source size.
121///
122/// This derives a cap from `ceil(log2(src_size))`, then clamps it to
123/// [`MIN_WINDOW_LOG`]. A zero-byte size hint is treated as
124/// [`MIN_WINDOW_LOG`]. This keeps tables bounded for
125/// small inputs while preserving the encoder's minimum supported window.
126/// For the HC backend, `hash_log` and `chain_log` are reduced
127/// proportionally.
128fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
129    // Derive a source-size-based cap from ceil(log2(src_size)), then
130    // clamp to MIN_WINDOW_LOG. For inputs smaller than 1 KiB (or zero) we keep the
131    // 1 KiB minimum window instead of shrinking below that floor.
132    let src_log = if src_size == 0 {
133        MIN_WINDOW_LOG
134    } else {
135        (64 - (src_size - 1).leading_zeros()) as u8 // ceil_log2
136    };
137    let src_log = src_log.max(MIN_WINDOW_LOG);
138    if src_log < params.window_log {
139        params.window_log = src_log;
140    }
141    // For HC backend: also cap hash_log and chain_log so tables are
142    // proportional to the source, avoiding multi-MB allocations for
143    // tiny inputs.
144    if params.backend == MatcherBackend::HashChain {
145        if (src_log + 2) < params.hc.hash_log as u8 {
146            params.hc.hash_log = (src_log + 2) as usize;
147        }
148        if (src_log + 1) < params.hc.chain_log as u8 {
149            params.hc.chain_log = (src_log + 1) as usize;
150        }
151    }
152    params
153}
154
155/// Resolve a [`CompressionLevel`] to internal tuning parameters,
156/// optionally adjusted for a known source size.
157fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
158    let params = match level {
159        CompressionLevel::Uncompressed => LevelParams {
160            backend: MatcherBackend::Simple,
161            window_log: 17,
162            hash_fill_step: 1,
163            lazy_depth: 0,
164            hc: HC_CONFIG,
165        },
166        CompressionLevel::Fastest => LEVEL_TABLE[0],
167        CompressionLevel::Default => LEVEL_TABLE[2],
168        CompressionLevel::Better => LEVEL_TABLE[6],
169        CompressionLevel::Best => LEVEL_TABLE[10],
170        CompressionLevel::Level(n) => {
171            if n > 0 {
172                let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
173                LEVEL_TABLE[idx]
174            } else if n == 0 {
175                // Level 0 = default, matching C zstd semantics.
176                LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
177            } else {
178                // Negative levels: ultra-fast with the Simple backend.
179                // Acceleration grows with magnitude, expressed as larger
180                // hash_fill_step (fewer positions indexed).
181                let acceleration =
182                    (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
183                let step = (acceleration + 3).min(128);
184                LevelParams {
185                    backend: MatcherBackend::Simple,
186                    window_log: 17,
187                    hash_fill_step: step,
188                    lazy_depth: 0,
189                    hc: HC_CONFIG,
190                }
191            }
192        }
193    };
194    if let Some(size) = source_size {
195        adjust_params_for_source_size(params, size)
196    } else {
197        params
198    }
199}
200
201#[derive(Copy, Clone, Debug, PartialEq, Eq)]
202enum MatcherBackend {
203    Simple,
204    Dfast,
205    HashChain,
206}
207
208/// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible.
209pub struct MatchGeneratorDriver {
210    vec_pool: Vec<Vec<u8>>,
211    suffix_pool: Vec<SuffixStore>,
212    match_generator: MatchGenerator,
213    dfast_match_generator: Option<DfastMatchGenerator>,
214    hc_match_generator: Option<HcMatchGenerator>,
215    active_backend: MatcherBackend,
216    slice_size: usize,
217    base_slice_size: usize,
218    // Frame header window size must stay at the configured live-window budget.
219    // Dictionary retention expands internal matcher capacity only.
220    reported_window_size: usize,
221    // Tracks currently retained bytes that originated from primed dictionary
222    // history and have not been evicted yet.
223    dictionary_retained_budget: usize,
224    // Source size hint for next frame (set via set_source_size_hint, cleared on reset).
225    source_size_hint: Option<u64>,
226}
227
228impl MatchGeneratorDriver {
229    /// `slice_size` sets the base block allocation size used for matcher input chunks.
230    /// `max_slices_in_window` determines the initial window capacity at construction
231    /// time. Effective window sizing is recalculated on every [`reset`](Self::reset)
232    /// from the resolved compression level and optional source-size hint.
233    pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
234        let max_window_size = max_slices_in_window * slice_size;
235        Self {
236            vec_pool: Vec::new(),
237            suffix_pool: Vec::new(),
238            match_generator: MatchGenerator::new(max_window_size),
239            dfast_match_generator: None,
240            hc_match_generator: None,
241            active_backend: MatcherBackend::Simple,
242            slice_size,
243            base_slice_size: slice_size,
244            reported_window_size: max_window_size,
245            dictionary_retained_budget: 0,
246            source_size_hint: None,
247        }
248    }
249
250    fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
251        resolve_level_params(level, source_size)
252    }
253
254    fn dfast_matcher(&self) -> &DfastMatchGenerator {
255        self.dfast_match_generator
256            .as_ref()
257            .expect("dfast backend must be initialized by reset() before use")
258    }
259
260    fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
261        self.dfast_match_generator
262            .as_mut()
263            .expect("dfast backend must be initialized by reset() before use")
264    }
265
266    fn hc_matcher(&self) -> &HcMatchGenerator {
267        self.hc_match_generator
268            .as_ref()
269            .expect("hash chain backend must be initialized by reset() before use")
270    }
271
272    fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
273        self.hc_match_generator
274            .as_mut()
275            .expect("hash chain backend must be initialized by reset() before use")
276    }
277
278    fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
279        let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
280        if reclaimed == 0 {
281            return;
282        }
283        self.dictionary_retained_budget -= reclaimed;
284        match self.active_backend {
285            MatcherBackend::Simple => {
286                self.match_generator.max_window_size = self
287                    .match_generator
288                    .max_window_size
289                    .saturating_sub(reclaimed);
290            }
291            MatcherBackend::Dfast => {
292                let matcher = self.dfast_matcher_mut();
293                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
294            }
295            MatcherBackend::HashChain => {
296                let matcher = self.hc_matcher_mut();
297                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
298            }
299        }
300    }
301
302    fn trim_after_budget_retire(&mut self) {
303        loop {
304            let mut evicted_bytes = 0usize;
305            match self.active_backend {
306                MatcherBackend::Simple => {
307                    let vec_pool = &mut self.vec_pool;
308                    let suffix_pool = &mut self.suffix_pool;
309                    self.match_generator.reserve(0, |mut data, mut suffixes| {
310                        evicted_bytes += data.len();
311                        data.resize(data.capacity(), 0);
312                        vec_pool.push(data);
313                        suffixes.slots.clear();
314                        suffixes.slots.resize(suffixes.slots.capacity(), None);
315                        suffix_pool.push(suffixes);
316                    });
317                }
318                MatcherBackend::Dfast => {
319                    let mut retired = Vec::new();
320                    self.dfast_matcher_mut().trim_to_window(|data| {
321                        evicted_bytes += data.len();
322                        retired.push(data);
323                    });
324                    for mut data in retired {
325                        data.resize(data.capacity(), 0);
326                        self.vec_pool.push(data);
327                    }
328                }
329                MatcherBackend::HashChain => {
330                    let mut retired = Vec::new();
331                    self.hc_matcher_mut().trim_to_window(|data| {
332                        evicted_bytes += data.len();
333                        retired.push(data);
334                    });
335                    for mut data in retired {
336                        data.resize(data.capacity(), 0);
337                        self.vec_pool.push(data);
338                    }
339                }
340            }
341            if evicted_bytes == 0 {
342                break;
343            }
344            self.retire_dictionary_budget(evicted_bytes);
345        }
346    }
347}
348
349impl Matcher for MatchGeneratorDriver {
350    fn supports_dictionary_priming(&self) -> bool {
351        true
352    }
353
354    fn set_source_size_hint(&mut self, size: u64) {
355        self.source_size_hint = Some(size);
356    }
357
358    fn reset(&mut self, level: CompressionLevel) {
359        let hint = self.source_size_hint.take();
360        let hinted = hint.is_some();
361        let params = Self::level_params(level, hint);
362        let max_window_size = 1usize << params.window_log;
363        self.dictionary_retained_budget = 0;
364        if self.active_backend != params.backend {
365            match self.active_backend {
366                MatcherBackend::Simple => {
367                    let vec_pool = &mut self.vec_pool;
368                    let suffix_pool = &mut self.suffix_pool;
369                    self.match_generator.reset(|mut data, mut suffixes| {
370                        data.resize(data.capacity(), 0);
371                        vec_pool.push(data);
372                        suffixes.slots.clear();
373                        suffixes.slots.resize(suffixes.slots.capacity(), None);
374                        suffix_pool.push(suffixes);
375                    });
376                }
377                MatcherBackend::Dfast => {
378                    if let Some(dfast) = self.dfast_match_generator.as_mut() {
379                        let vec_pool = &mut self.vec_pool;
380                        dfast.reset(|mut data| {
381                            data.resize(data.capacity(), 0);
382                            vec_pool.push(data);
383                        });
384                    }
385                }
386                MatcherBackend::HashChain => {
387                    if let Some(hc) = self.hc_match_generator.as_mut() {
388                        // Release oversized tables when switching away from
389                        // HashChain so Best's larger allocations don't persist.
390                        hc.hash_table = Vec::new();
391                        hc.chain_table = Vec::new();
392                        let vec_pool = &mut self.vec_pool;
393                        hc.reset(|mut data| {
394                            data.resize(data.capacity(), 0);
395                            vec_pool.push(data);
396                        });
397                    }
398                }
399            }
400        }
401
402        self.active_backend = params.backend;
403        self.slice_size = self.base_slice_size.min(max_window_size);
404        self.reported_window_size = max_window_size;
405        match self.active_backend {
406            MatcherBackend::Simple => {
407                let vec_pool = &mut self.vec_pool;
408                let suffix_pool = &mut self.suffix_pool;
409                self.match_generator.max_window_size = max_window_size;
410                self.match_generator.hash_fill_step = params.hash_fill_step;
411                self.match_generator.reset(|mut data, mut suffixes| {
412                    data.resize(data.capacity(), 0);
413                    vec_pool.push(data);
414                    suffixes.slots.clear();
415                    suffixes.slots.resize(suffixes.slots.capacity(), None);
416                    suffix_pool.push(suffixes);
417                });
418            }
419            MatcherBackend::Dfast => {
420                let dfast = self
421                    .dfast_match_generator
422                    .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
423                dfast.max_window_size = max_window_size;
424                dfast.lazy_depth = params.lazy_depth;
425                dfast.set_hash_bits(if hinted {
426                    dfast_hash_bits_for_window(max_window_size)
427                } else {
428                    DFAST_HASH_BITS
429                });
430                let vec_pool = &mut self.vec_pool;
431                dfast.reset(|mut data| {
432                    data.resize(data.capacity(), 0);
433                    vec_pool.push(data);
434                });
435            }
436            MatcherBackend::HashChain => {
437                let hc = self
438                    .hc_match_generator
439                    .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
440                hc.max_window_size = max_window_size;
441                hc.lazy_depth = params.lazy_depth;
442                hc.configure(params.hc);
443                let vec_pool = &mut self.vec_pool;
444                hc.reset(|mut data| {
445                    data.resize(data.capacity(), 0);
446                    vec_pool.push(data);
447                });
448            }
449        }
450    }
451
452    fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
453        match self.active_backend {
454            MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
455            MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
456            MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
457        }
458
459        if dict_content.is_empty() {
460            return;
461        }
462
463        // Dictionary bytes should stay addressable until produced frame output
464        // itself exceeds the live window size.
465        let retained_dict_budget = dict_content.len();
466        match self.active_backend {
467            MatcherBackend::Simple => {
468                self.match_generator.max_window_size = self
469                    .match_generator
470                    .max_window_size
471                    .saturating_add(retained_dict_budget);
472            }
473            MatcherBackend::Dfast => {
474                let matcher = self.dfast_matcher_mut();
475                matcher.max_window_size =
476                    matcher.max_window_size.saturating_add(retained_dict_budget);
477            }
478            MatcherBackend::HashChain => {
479                let matcher = self.hc_matcher_mut();
480                matcher.max_window_size =
481                    matcher.max_window_size.saturating_add(retained_dict_budget);
482            }
483        }
484
485        let mut start = 0usize;
486        let mut committed_dict_budget = 0usize;
487        // insert_position needs 4 bytes of lookahead for hashing;
488        // backfill_boundary_positions re-visits tail positions once the
489        // next slice extends history, but cannot hash <4 byte fragments.
490        let min_primed_tail = match self.active_backend {
491            MatcherBackend::Simple => MIN_MATCH_LEN,
492            MatcherBackend::Dfast | MatcherBackend::HashChain => 4,
493        };
494        while start < dict_content.len() {
495            let end = (start + self.slice_size).min(dict_content.len());
496            if end - start < min_primed_tail {
497                break;
498            }
499            let mut space = self.get_next_space();
500            space.clear();
501            space.extend_from_slice(&dict_content[start..end]);
502            self.commit_space(space);
503            self.skip_matching();
504            committed_dict_budget += end - start;
505            start = end;
506        }
507
508        let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
509        if uncommitted_tail_budget > 0 {
510            match self.active_backend {
511                MatcherBackend::Simple => {
512                    self.match_generator.max_window_size = self
513                        .match_generator
514                        .max_window_size
515                        .saturating_sub(uncommitted_tail_budget);
516                }
517                MatcherBackend::Dfast => {
518                    let matcher = self.dfast_matcher_mut();
519                    matcher.max_window_size = matcher
520                        .max_window_size
521                        .saturating_sub(uncommitted_tail_budget);
522                }
523                MatcherBackend::HashChain => {
524                    let matcher = self.hc_matcher_mut();
525                    matcher.max_window_size = matcher
526                        .max_window_size
527                        .saturating_sub(uncommitted_tail_budget);
528                }
529            }
530        }
531        if committed_dict_budget > 0 {
532            self.dictionary_retained_budget = self
533                .dictionary_retained_budget
534                .saturating_add(committed_dict_budget);
535        }
536    }
537
538    fn window_size(&self) -> u64 {
539        self.reported_window_size as u64
540    }
541
542    fn get_next_space(&mut self) -> Vec<u8> {
543        if let Some(mut space) = self.vec_pool.pop() {
544            if space.len() > self.slice_size {
545                space.truncate(self.slice_size);
546            }
547            if space.len() < self.slice_size {
548                space.resize(self.slice_size, 0);
549            }
550            return space;
551        }
552        alloc::vec![0; self.slice_size]
553    }
554
555    fn get_last_space(&mut self) -> &[u8] {
556        match self.active_backend {
557            MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
558            MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
559            MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
560        }
561    }
562
563    fn commit_space(&mut self, space: Vec<u8>) {
564        match self.active_backend {
565            MatcherBackend::Simple => {
566                let vec_pool = &mut self.vec_pool;
567                let mut evicted_bytes = 0usize;
568                let suffixes = match self.suffix_pool.pop() {
569                    Some(store) if store.slots.len() >= space.len() => store,
570                    _ => SuffixStore::with_capacity(space.len()),
571                };
572                let suffix_pool = &mut self.suffix_pool;
573                self.match_generator
574                    .add_data(space, suffixes, |mut data, mut suffixes| {
575                        evicted_bytes += data.len();
576                        data.resize(data.capacity(), 0);
577                        vec_pool.push(data);
578                        suffixes.slots.clear();
579                        suffixes.slots.resize(suffixes.slots.capacity(), None);
580                        suffix_pool.push(suffixes);
581                    });
582                self.retire_dictionary_budget(evicted_bytes);
583                self.trim_after_budget_retire();
584            }
585            MatcherBackend::Dfast => {
586                let vec_pool = &mut self.vec_pool;
587                let mut evicted_bytes = 0usize;
588                self.dfast_match_generator
589                    .as_mut()
590                    .expect("dfast backend must be initialized by reset() before use")
591                    .add_data(space, |mut data| {
592                        evicted_bytes += data.len();
593                        data.resize(data.capacity(), 0);
594                        vec_pool.push(data);
595                    });
596                self.retire_dictionary_budget(evicted_bytes);
597                self.trim_after_budget_retire();
598            }
599            MatcherBackend::HashChain => {
600                let vec_pool = &mut self.vec_pool;
601                let mut evicted_bytes = 0usize;
602                self.hc_match_generator
603                    .as_mut()
604                    .expect("hash chain backend must be initialized by reset() before use")
605                    .add_data(space, |mut data| {
606                        evicted_bytes += data.len();
607                        data.resize(data.capacity(), 0);
608                        vec_pool.push(data);
609                    });
610                self.retire_dictionary_budget(evicted_bytes);
611                self.trim_after_budget_retire();
612            }
613        }
614    }
615
616    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
617        match self.active_backend {
618            MatcherBackend::Simple => {
619                while self.match_generator.next_sequence(&mut handle_sequence) {}
620            }
621            MatcherBackend::Dfast => self
622                .dfast_matcher_mut()
623                .start_matching(&mut handle_sequence),
624            MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
625        }
626    }
627    fn skip_matching(&mut self) {
628        match self.active_backend {
629            MatcherBackend::Simple => self.match_generator.skip_matching(),
630            MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
631            MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(),
632        }
633    }
634}
635
636/// This stores the index of a suffix of a string by hashing the first few bytes of that suffix
637/// This means that collisions just overwrite and that you need to check validity after a get
638struct SuffixStore {
639    // We use NonZeroUsize to enable niche optimization here.
640    // On store we do +1 and on get -1
641    // This is ok since usize::MAX is never a valid offset
642    slots: Vec<Option<NonZeroUsize>>,
643    len_log: u32,
644}
645
646impl SuffixStore {
647    fn with_capacity(capacity: usize) -> Self {
648        Self {
649            slots: alloc::vec![None; capacity],
650            len_log: capacity.ilog2(),
651        }
652    }
653
654    #[inline(always)]
655    fn insert(&mut self, suffix: &[u8], idx: usize) {
656        let key = self.key(suffix);
657        self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
658    }
659
660    #[inline(always)]
661    fn contains_key(&self, suffix: &[u8]) -> bool {
662        let key = self.key(suffix);
663        self.slots[key].is_some()
664    }
665
666    #[inline(always)]
667    fn get(&self, suffix: &[u8]) -> Option<usize> {
668        let key = self.key(suffix);
669        self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
670    }
671
672    #[inline(always)]
673    fn key(&self, suffix: &[u8]) -> usize {
674        // Capacity=1 yields len_log=0; shifting by 64 would panic.
675        if self.len_log == 0 {
676            return 0;
677        }
678
679        let s0 = suffix[0] as u64;
680        let s1 = suffix[1] as u64;
681        let s2 = suffix[2] as u64;
682        let s3 = suffix[3] as u64;
683        let s4 = suffix[4] as u64;
684
685        const POLY: u64 = 0xCF3BCCDCABu64;
686
687        let s0 = (s0 << 24).wrapping_mul(POLY);
688        let s1 = (s1 << 32).wrapping_mul(POLY);
689        let s2 = (s2 << 40).wrapping_mul(POLY);
690        let s3 = (s3 << 48).wrapping_mul(POLY);
691        let s4 = (s4 << 56).wrapping_mul(POLY);
692
693        let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
694        let index = index >> (64 - self.len_log);
695        index as usize % self.slots.len()
696    }
697}
698
699/// We keep a window of a few of these entries
700/// All of these are valid targets for a match to be generated for
701struct WindowEntry {
702    data: Vec<u8>,
703    /// Stores indexes into data
704    suffixes: SuffixStore,
705    /// Makes offset calculations efficient
706    base_offset: usize,
707}
708
709pub(crate) struct MatchGenerator {
710    max_window_size: usize,
711    /// Data window we are operating on to find matches
712    /// The data we want to find matches for is in the last slice
713    window: Vec<WindowEntry>,
714    window_size: usize,
715    #[cfg(debug_assertions)]
716    concat_window: Vec<u8>,
717    /// Index in the last slice that we already processed
718    suffix_idx: usize,
719    /// Gets updated when a new sequence is returned to point right behind that sequence
720    last_idx_in_sequence: usize,
721    hash_fill_step: usize,
722    offset_hist: [u32; 3],
723}
724
725impl MatchGenerator {
726    #[inline(always)]
727    #[cfg(target_endian = "little")]
728    fn mismatch_byte_index(diff: usize) -> usize {
729        diff.trailing_zeros() as usize / 8
730    }
731
732    #[inline(always)]
733    #[cfg(target_endian = "big")]
734    fn mismatch_byte_index(diff: usize) -> usize {
735        diff.leading_zeros() as usize / 8
736    }
737
738    /// max_size defines how many bytes will be used at most in the window used for matching
739    fn new(max_size: usize) -> Self {
740        Self {
741            max_window_size: max_size,
742            window: Vec::new(),
743            window_size: 0,
744            #[cfg(debug_assertions)]
745            concat_window: Vec::new(),
746            suffix_idx: 0,
747            last_idx_in_sequence: 0,
748            hash_fill_step: 1,
749            offset_hist: [1, 4, 8],
750        }
751    }
752
753    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
754        self.window_size = 0;
755        #[cfg(debug_assertions)]
756        self.concat_window.clear();
757        self.suffix_idx = 0;
758        self.last_idx_in_sequence = 0;
759        self.offset_hist = [1, 4, 8];
760        self.window.drain(..).for_each(|entry| {
761            reuse_space(entry.data, entry.suffixes);
762        });
763    }
764
765    /// Processes bytes in the current window until either a match is found or no more matches can be found
766    /// * If a match is found handle_sequence is called with the Triple variant
767    /// * If no more matches can be found but there are bytes still left handle_sequence is called with the Literals variant
768    /// * If no more matches can be found and no more bytes are left this returns false
769    fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
770        loop {
771            let last_entry = self.window.last().unwrap();
772            let data_slice = &last_entry.data;
773
774            // We already reached the end of the window, check if we need to return a Literals{}
775            if self.suffix_idx >= data_slice.len() {
776                if self.last_idx_in_sequence != self.suffix_idx {
777                    let literals = &data_slice[self.last_idx_in_sequence..];
778                    self.last_idx_in_sequence = self.suffix_idx;
779                    handle_sequence(Sequence::Literals { literals });
780                    return true;
781                } else {
782                    return false;
783                }
784            }
785
786            // If the remaining data is smaller than the minimum match length we can stop and return a Literals{}
787            let data_slice = &data_slice[self.suffix_idx..];
788            if data_slice.len() < MIN_MATCH_LEN {
789                let last_idx_in_sequence = self.last_idx_in_sequence;
790                self.last_idx_in_sequence = last_entry.data.len();
791                self.suffix_idx = last_entry.data.len();
792                handle_sequence(Sequence::Literals {
793                    literals: &last_entry.data[last_idx_in_sequence..],
794                });
795                return true;
796            }
797
798            // This is the key we are looking to find a match for
799            let key = &data_slice[..MIN_MATCH_LEN];
800            let literals_len = self.suffix_idx - self.last_idx_in_sequence;
801
802            // Look in each window entry
803            let mut candidate = self.repcode_candidate(data_slice, literals_len);
804            for match_entry in self.window.iter() {
805                if let Some(match_index) = match_entry.suffixes.get(key) {
806                    let match_slice = &match_entry.data[match_index..];
807
808                    // Check how long the common prefix actually is
809                    let match_len = Self::common_prefix_len(match_slice, data_slice);
810
811                    // Collisions in the suffix store might make this check fail
812                    if match_len >= MIN_MATCH_LEN {
813                        let offset = match_entry.base_offset + self.suffix_idx - match_index;
814
815                        // If we are in debug/tests make sure the match we found is actually at the offset we calculated
816                        #[cfg(debug_assertions)]
817                        {
818                            let unprocessed = last_entry.data.len() - self.suffix_idx;
819                            let start = self.concat_window.len() - unprocessed - offset;
820                            let end = start + match_len;
821                            let check_slice = &self.concat_window[start..end];
822                            debug_assert_eq!(check_slice, &match_slice[..match_len]);
823                        }
824
825                        if let Some((old_offset, old_match_len)) = candidate {
826                            if match_len > old_match_len
827                                || (match_len == old_match_len && offset < old_offset)
828                            {
829                                candidate = Some((offset, match_len));
830                            }
831                        } else {
832                            candidate = Some((offset, match_len));
833                        }
834                    }
835                }
836            }
837
838            if let Some((offset, match_len)) = candidate {
839                // For each index in the match we found we do not need to look for another match
840                // But we still want them registered in the suffix store
841                self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
842
843                // All literals that were not included between this match and the last are now included here
844                let last_entry = self.window.last().unwrap();
845                let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
846
847                // Update the indexes, all indexes upto and including the current index have been included in a sequence now
848                self.suffix_idx += match_len;
849                self.last_idx_in_sequence = self.suffix_idx;
850                let _ = encode_offset_with_history(
851                    offset as u32,
852                    literals.len() as u32,
853                    &mut self.offset_hist,
854                );
855                handle_sequence(Sequence::Triple {
856                    literals,
857                    offset,
858                    match_len,
859                });
860
861                return true;
862            }
863
864            let last_entry = self.window.last_mut().unwrap();
865            let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
866            if !last_entry.suffixes.contains_key(key) {
867                last_entry.suffixes.insert(key, self.suffix_idx);
868            }
869            self.suffix_idx += 1;
870        }
871    }
872
873    /// Find the common prefix length between two byte slices
874    #[inline(always)]
875    fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
876        let max = a.len().min(b.len());
877        let chunk = core::mem::size_of::<usize>();
878        let mut off = 0usize;
879        let lhs = a.as_ptr();
880        let rhs = b.as_ptr();
881
882        while off + chunk <= max {
883            let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
884            let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
885            let diff = lhs_word ^ rhs_word;
886            if diff != 0 {
887                return off + Self::mismatch_byte_index(diff);
888            }
889            off += chunk;
890        }
891
892        off + core::iter::zip(&a[off..max], &b[off..max])
893            .take_while(|(x, y)| x == y)
894            .count()
895    }
896
897    /// Process bytes and add the suffixes to the suffix store up to a specific index
898    #[inline(always)]
899    fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
900        let start = self.suffix_idx;
901        let last_entry = self.window.last_mut().unwrap();
902        if last_entry.data.len() < MIN_MATCH_LEN {
903            return;
904        }
905        let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
906        if insert_limit > start {
907            let data = last_entry.data.as_slice();
908            let suffixes = &mut last_entry.suffixes;
909            if fill_step == FAST_HASH_FILL_STEP {
910                Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
911            } else {
912                let mut pos = start;
913                while pos < insert_limit {
914                    Self::insert_suffix_if_absent(data, suffixes, pos);
915                    pos += fill_step;
916                }
917            }
918        }
919
920        if idx >= start + MIN_MATCH_LEN {
921            let tail_start = idx - MIN_MATCH_LEN;
922            let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
923            if !last_entry.suffixes.contains_key(tail_key) {
924                last_entry.suffixes.insert(tail_key, tail_start);
925            }
926        }
927    }
928
929    #[inline(always)]
930    fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
931        debug_assert!(
932            pos + MIN_MATCH_LEN <= data.len(),
933            "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
934            pos,
935            MIN_MATCH_LEN,
936            data.len()
937        );
938        let key = &data[pos..pos + MIN_MATCH_LEN];
939        if !suffixes.contains_key(key) {
940            suffixes.insert(key, pos);
941        }
942    }
943
944    #[inline(always)]
945    fn add_suffixes_interleaved_fast(
946        data: &[u8],
947        suffixes: &mut SuffixStore,
948        start: usize,
949        insert_limit: usize,
950    ) {
951        let lane = FAST_HASH_FILL_STEP;
952        let mut pos = start;
953
954        // Pipeline-ish fill: compute and retire several hash positions per loop
955        // so the fastest path keeps multiple independent hash lookups in flight.
956        while pos + lane * 3 < insert_limit {
957            let p0 = pos;
958            let p1 = pos + lane;
959            let p2 = pos + lane * 2;
960            let p3 = pos + lane * 3;
961
962            Self::insert_suffix_if_absent(data, suffixes, p0);
963            Self::insert_suffix_if_absent(data, suffixes, p1);
964            Self::insert_suffix_if_absent(data, suffixes, p2);
965            Self::insert_suffix_if_absent(data, suffixes, p3);
966
967            pos += lane * 4;
968        }
969
970        while pos < insert_limit {
971            Self::insert_suffix_if_absent(data, suffixes, pos);
972            pos += lane;
973        }
974    }
975
976    fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
977        if literals_len != 0 {
978            return None;
979        }
980
981        let reps = [
982            Some(self.offset_hist[1] as usize),
983            Some(self.offset_hist[2] as usize),
984            (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
985        ];
986
987        let mut best: Option<(usize, usize)> = None;
988        let mut seen = [0usize; 3];
989        let mut seen_len = 0usize;
990        for offset in reps.into_iter().flatten() {
991            if offset == 0 {
992                continue;
993            }
994            if seen[..seen_len].contains(&offset) {
995                continue;
996            }
997            seen[seen_len] = offset;
998            seen_len += 1;
999
1000            let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1001                continue;
1002            };
1003            if match_len < MIN_MATCH_LEN {
1004                continue;
1005            }
1006
1007            if best.is_none_or(|(old_offset, old_len)| {
1008                match_len > old_len || (match_len == old_len && offset < old_offset)
1009            }) {
1010                best = Some((offset, match_len));
1011            }
1012        }
1013        best
1014    }
1015
1016    fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1017        if offset == 0 {
1018            return None;
1019        }
1020
1021        let last_idx = self.window.len().checked_sub(1)?;
1022        let last_entry = &self.window[last_idx];
1023        let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1024        if offset > searchable_prefix {
1025            return None;
1026        }
1027
1028        let mut remaining = offset;
1029        let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1030            (last_idx, self.suffix_idx - remaining)
1031        } else {
1032            remaining -= self.suffix_idx;
1033            let mut found = None;
1034            for entry_idx in (0..last_idx).rev() {
1035                let len = self.window[entry_idx].data.len();
1036                if remaining <= len {
1037                    found = Some((entry_idx, len - remaining));
1038                    break;
1039                }
1040                remaining -= len;
1041            }
1042            found?
1043        };
1044
1045        let match_entry = &self.window[entry_idx];
1046        let match_slice = &match_entry.data[match_index..];
1047
1048        Some(Self::common_prefix_len(match_slice, data_slice))
1049    }
1050
1051    /// Skip matching for the whole current window entry
1052    fn skip_matching(&mut self) {
1053        let len = self.window.last().unwrap().data.len();
1054        self.add_suffixes_till(len, 1);
1055        self.suffix_idx = len;
1056        self.last_idx_in_sequence = len;
1057    }
1058
1059    /// Add a new window entry. Will panic if the last window entry hasn't been processed properly.
1060    /// If any resources are released by pushing the new entry they are returned via the callback
1061    fn add_data(
1062        &mut self,
1063        data: Vec<u8>,
1064        suffixes: SuffixStore,
1065        reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1066    ) {
1067        assert!(
1068            self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1069        );
1070        self.reserve(data.len(), reuse_space);
1071        #[cfg(debug_assertions)]
1072        self.concat_window.extend_from_slice(&data);
1073
1074        if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1075            for entry in self.window.iter_mut() {
1076                entry.base_offset += last_len;
1077            }
1078        }
1079
1080        let len = data.len();
1081        self.window.push(WindowEntry {
1082            data,
1083            suffixes,
1084            base_offset: 0,
1085        });
1086        self.window_size += len;
1087        self.suffix_idx = 0;
1088        self.last_idx_in_sequence = 0;
1089    }
1090
1091    /// Reserve space for a new window entry
1092    /// If any resources are released by pushing the new entry they are returned via the callback
1093    fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1094        assert!(self.max_window_size >= amount);
1095        while self.window_size + amount > self.max_window_size {
1096            let removed = self.window.remove(0);
1097            self.window_size -= removed.data.len();
1098            #[cfg(debug_assertions)]
1099            self.concat_window.drain(0..removed.data.len());
1100
1101            let WindowEntry {
1102                suffixes,
1103                data: leaked_vec,
1104                base_offset: _,
1105            } = removed;
1106            reuse_space(leaked_vec, suffixes);
1107        }
1108    }
1109}
1110
1111struct DfastMatchGenerator {
1112    max_window_size: usize,
1113    window: VecDeque<Vec<u8>>,
1114    window_size: usize,
1115    // We keep a contiguous searchable history to avoid rebuilding and reseeding
1116    // the matcher state from disjoint block buffers on every block.
1117    history: Vec<u8>,
1118    history_start: usize,
1119    history_abs_start: usize,
1120    offset_hist: [u32; 3],
1121    short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1122    long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1123    hash_bits: usize,
1124    // Lazy match lookahead depth (internal tuning parameter).
1125    lazy_depth: u8,
1126}
1127
1128#[derive(Copy, Clone)]
1129struct MatchCandidate {
1130    start: usize,
1131    offset: usize,
1132    match_len: usize,
1133}
1134
1135impl DfastMatchGenerator {
1136    fn new(max_window_size: usize) -> Self {
1137        Self {
1138            max_window_size,
1139            window: VecDeque::new(),
1140            window_size: 0,
1141            history: Vec::new(),
1142            history_start: 0,
1143            history_abs_start: 0,
1144            offset_hist: [1, 4, 8],
1145            short_hash: Vec::new(),
1146            long_hash: Vec::new(),
1147            hash_bits: DFAST_HASH_BITS,
1148            lazy_depth: 1,
1149        }
1150    }
1151
1152    fn set_hash_bits(&mut self, bits: usize) {
1153        let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1154        if self.hash_bits != clamped {
1155            self.hash_bits = clamped;
1156            self.short_hash = Vec::new();
1157            self.long_hash = Vec::new();
1158        }
1159    }
1160
1161    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1162        self.window_size = 0;
1163        self.history.clear();
1164        self.history_start = 0;
1165        self.history_abs_start = 0;
1166        self.offset_hist = [1, 4, 8];
1167        if !self.short_hash.is_empty() {
1168            self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1169            self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1170        }
1171        for mut data in self.window.drain(..) {
1172            data.resize(data.capacity(), 0);
1173            reuse_space(data);
1174        }
1175    }
1176
1177    fn get_last_space(&self) -> &[u8] {
1178        self.window.back().unwrap().as_slice()
1179    }
1180
1181    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1182        assert!(data.len() <= self.max_window_size);
1183        while self.window_size + data.len() > self.max_window_size {
1184            let removed = self.window.pop_front().unwrap();
1185            self.window_size -= removed.len();
1186            self.history_start += removed.len();
1187            self.history_abs_start += removed.len();
1188            reuse_space(removed);
1189        }
1190        self.compact_history();
1191        self.history.extend_from_slice(&data);
1192        self.window_size += data.len();
1193        self.window.push_back(data);
1194    }
1195
1196    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1197        while self.window_size > self.max_window_size {
1198            let removed = self.window.pop_front().unwrap();
1199            self.window_size -= removed.len();
1200            self.history_start += removed.len();
1201            self.history_abs_start += removed.len();
1202            reuse_space(removed);
1203        }
1204    }
1205
1206    fn skip_matching(&mut self) {
1207        self.ensure_hash_tables();
1208        let current_len = self.window.back().unwrap().len();
1209        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1210        self.insert_positions(current_abs_start, current_abs_start + current_len);
1211    }
1212
1213    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1214        self.ensure_hash_tables();
1215
1216        let current_len = self.window.back().unwrap().len();
1217        if current_len == 0 {
1218            return;
1219        }
1220
1221        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1222
1223        let mut pos = 0usize;
1224        let mut literals_start = 0usize;
1225        while pos + DFAST_MIN_MATCH_LEN <= current_len {
1226            let abs_pos = current_abs_start + pos;
1227            let lit_len = pos - literals_start;
1228
1229            let best = self.best_match(abs_pos, lit_len);
1230            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1231                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1232                let current = self.window.back().unwrap().as_slice();
1233                let start = candidate.start - current_abs_start;
1234                let literals = &current[literals_start..start];
1235                handle_sequence(Sequence::Triple {
1236                    literals,
1237                    offset: candidate.offset,
1238                    match_len: candidate.match_len,
1239                });
1240                // The encoded offset value is irrelevant here; we only need the
1241                // side effect on offset history for future rep-code matching.
1242                let _ = encode_offset_with_history(
1243                    candidate.offset as u32,
1244                    literals.len() as u32,
1245                    &mut self.offset_hist,
1246                );
1247                pos = start + candidate.match_len;
1248                literals_start = pos;
1249            } else {
1250                self.insert_position(abs_pos);
1251                pos += 1;
1252            }
1253        }
1254
1255        if literals_start < current_len {
1256            // We stop inserting once fewer than DFAST_MIN_MATCH_LEN bytes remain.
1257            // The last boundary-spanning start that can seed a dfast hash is
1258            // still inserted by the loop above; `dfast_inserts_tail_positions_
1259            // for_next_block_matching()` asserts that the next block can match
1260            // immediately at the boundary without eagerly seeding the final
1261            // DFAST_MIN_MATCH_LEN - 1 bytes here.
1262            let current = self.window.back().unwrap().as_slice();
1263            handle_sequence(Sequence::Literals {
1264                literals: &current[literals_start..],
1265            });
1266        }
1267    }
1268
1269    fn ensure_hash_tables(&mut self) {
1270        let table_len = 1usize << self.hash_bits;
1271        if self.short_hash.len() != table_len {
1272            // This is intentionally lazy so Fastest/Uncompressed never pay the
1273            // ~dfast-level memory cost. The current size tracks the issue's
1274            // zstd level-3 style parameters rather than a generic low-memory preset.
1275            self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1276            self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1277        }
1278    }
1279
1280    fn compact_history(&mut self) {
1281        if self.history_start == 0 {
1282            return;
1283        }
1284        if self.history_start >= self.max_window_size
1285            || self.history_start * 2 >= self.history.len()
1286        {
1287            self.history.drain(..self.history_start);
1288            self.history_start = 0;
1289        }
1290    }
1291
1292    fn live_history(&self) -> &[u8] {
1293        &self.history[self.history_start..]
1294    }
1295
1296    fn history_abs_end(&self) -> usize {
1297        self.history_abs_start + self.live_history().len()
1298    }
1299
1300    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1301        let rep = self.repcode_candidate(abs_pos, lit_len);
1302        let hash = self.hash_candidate(abs_pos, lit_len);
1303        Self::better_candidate(rep, hash)
1304    }
1305
1306    fn pick_lazy_match(
1307        &self,
1308        abs_pos: usize,
1309        lit_len: usize,
1310        best: Option<MatchCandidate>,
1311    ) -> Option<MatchCandidate> {
1312        let best = best?;
1313        if best.match_len >= DFAST_TARGET_LEN
1314            || abs_pos + 1 + DFAST_MIN_MATCH_LEN > self.history_abs_end()
1315        {
1316            return Some(best);
1317        }
1318
1319        // Lazy check: evaluate pos+1
1320        let next = self.best_match(abs_pos + 1, lit_len + 1);
1321        if let Some(next) = next
1322            && (next.match_len > best.match_len
1323                || (next.match_len == best.match_len && next.offset < best.offset))
1324        {
1325            return None;
1326        }
1327
1328        // Lazy2 check: also evaluate pos+2
1329        if self.lazy_depth >= 2 && abs_pos + 2 + DFAST_MIN_MATCH_LEN <= self.history_abs_end() {
1330            let next2 = self.best_match(abs_pos + 2, lit_len + 2);
1331            if let Some(next2) = next2
1332                && next2.match_len > best.match_len + 1
1333            {
1334                return None;
1335            }
1336        }
1337
1338        Some(best)
1339    }
1340
1341    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1342        let reps = if lit_len == 0 {
1343            [
1344                Some(self.offset_hist[1] as usize),
1345                Some(self.offset_hist[2] as usize),
1346                (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1347            ]
1348        } else {
1349            [
1350                Some(self.offset_hist[0] as usize),
1351                Some(self.offset_hist[1] as usize),
1352                Some(self.offset_hist[2] as usize),
1353            ]
1354        };
1355
1356        let mut best = None;
1357        for rep in reps.into_iter().flatten() {
1358            if rep == 0 || rep > abs_pos {
1359                continue;
1360            }
1361            let candidate_pos = abs_pos - rep;
1362            if candidate_pos < self.history_abs_start {
1363                continue;
1364            }
1365            let concat = self.live_history();
1366            let candidate_idx = candidate_pos - self.history_abs_start;
1367            let current_idx = abs_pos - self.history_abs_start;
1368            if current_idx + DFAST_MIN_MATCH_LEN > concat.len() {
1369                continue;
1370            }
1371            let match_len =
1372                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1373            if match_len >= DFAST_MIN_MATCH_LEN {
1374                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1375                best = Self::better_candidate(best, Some(candidate));
1376            }
1377        }
1378        best
1379    }
1380
1381    fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1382        let concat = self.live_history();
1383        let current_idx = abs_pos - self.history_abs_start;
1384        let mut best = None;
1385        for candidate_pos in self.long_candidates(abs_pos) {
1386            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1387                continue;
1388            }
1389            let candidate_idx = candidate_pos - self.history_abs_start;
1390            let match_len =
1391                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1392            if match_len >= DFAST_MIN_MATCH_LEN {
1393                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1394                best = Self::better_candidate(best, Some(candidate));
1395                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1396                    return best;
1397                }
1398            }
1399        }
1400
1401        for candidate_pos in self.short_candidates(abs_pos) {
1402            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1403                continue;
1404            }
1405            let candidate_idx = candidate_pos - self.history_abs_start;
1406            let match_len =
1407                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1408            if match_len >= DFAST_MIN_MATCH_LEN {
1409                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1410                best = Self::better_candidate(best, Some(candidate));
1411                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1412                    return best;
1413                }
1414            }
1415        }
1416        best
1417    }
1418
1419    fn extend_backwards(
1420        &self,
1421        mut candidate_pos: usize,
1422        mut abs_pos: usize,
1423        mut match_len: usize,
1424        lit_len: usize,
1425    ) -> MatchCandidate {
1426        let concat = self.live_history();
1427        let min_abs_pos = abs_pos - lit_len;
1428        while abs_pos > min_abs_pos
1429            && candidate_pos > self.history_abs_start
1430            && concat[candidate_pos - self.history_abs_start - 1]
1431                == concat[abs_pos - self.history_abs_start - 1]
1432        {
1433            candidate_pos -= 1;
1434            abs_pos -= 1;
1435            match_len += 1;
1436        }
1437        MatchCandidate {
1438            start: abs_pos,
1439            offset: abs_pos - candidate_pos,
1440            match_len,
1441        }
1442    }
1443
1444    fn better_candidate(
1445        lhs: Option<MatchCandidate>,
1446        rhs: Option<MatchCandidate>,
1447    ) -> Option<MatchCandidate> {
1448        match (lhs, rhs) {
1449            (None, other) | (other, None) => other,
1450            (Some(lhs), Some(rhs)) => {
1451                if rhs.match_len > lhs.match_len
1452                    || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1453                {
1454                    Some(rhs)
1455                } else {
1456                    Some(lhs)
1457                }
1458            }
1459        }
1460    }
1461
1462    fn insert_positions(&mut self, start: usize, end: usize) {
1463        for pos in start..end {
1464            self.insert_position(pos);
1465        }
1466    }
1467
1468    fn insert_position(&mut self, pos: usize) {
1469        let idx = pos - self.history_abs_start;
1470        let short = {
1471            let concat = self.live_history();
1472            (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
1473        };
1474        if let Some(short) = short {
1475            let bucket = &mut self.short_hash[short];
1476            if bucket[0] != pos {
1477                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1478                bucket[0] = pos;
1479            }
1480        }
1481
1482        let long = {
1483            let concat = self.live_history();
1484            (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
1485        };
1486        if let Some(long) = long {
1487            let bucket = &mut self.long_hash[long];
1488            if bucket[0] != pos {
1489                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1490                bucket[0] = pos;
1491            }
1492        }
1493    }
1494
1495    fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1496        let concat = self.live_history();
1497        let idx = pos - self.history_abs_start;
1498        (idx + 4 <= concat.len())
1499            .then(|| self.short_hash[self.hash4(&concat[idx..])])
1500            .into_iter()
1501            .flatten()
1502            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1503    }
1504
1505    fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1506        let concat = self.live_history();
1507        let idx = pos - self.history_abs_start;
1508        (idx + 8 <= concat.len())
1509            .then(|| self.long_hash[self.hash8(&concat[idx..])])
1510            .into_iter()
1511            .flatten()
1512            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1513    }
1514
1515    fn hash4(&self, data: &[u8]) -> usize {
1516        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1517        self.hash_index(value)
1518    }
1519
1520    fn hash8(&self, data: &[u8]) -> usize {
1521        let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1522        self.hash_index(value)
1523    }
1524
1525    fn hash_index(&self, value: u64) -> usize {
1526        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1527        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize
1528    }
1529}
1530
1531struct HcMatchGenerator {
1532    max_window_size: usize,
1533    window: VecDeque<Vec<u8>>,
1534    window_size: usize,
1535    history: Vec<u8>,
1536    history_start: usize,
1537    history_abs_start: usize,
1538    offset_hist: [u32; 3],
1539    hash_table: Vec<u32>,
1540    chain_table: Vec<u32>,
1541    lazy_depth: u8,
1542    hash_log: usize,
1543    chain_log: usize,
1544    search_depth: usize,
1545    target_len: usize,
1546}
1547
1548impl HcMatchGenerator {
1549    fn new(max_window_size: usize) -> Self {
1550        Self {
1551            max_window_size,
1552            window: VecDeque::new(),
1553            window_size: 0,
1554            history: Vec::new(),
1555            history_start: 0,
1556            history_abs_start: 0,
1557            offset_hist: [1, 4, 8],
1558            hash_table: Vec::new(),
1559            chain_table: Vec::new(),
1560            lazy_depth: 2,
1561            hash_log: HC_HASH_LOG,
1562            chain_log: HC_CHAIN_LOG,
1563            search_depth: HC_SEARCH_DEPTH,
1564            target_len: HC_TARGET_LEN,
1565        }
1566    }
1567
1568    fn configure(&mut self, config: HcConfig) {
1569        let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
1570        self.hash_log = config.hash_log;
1571        self.chain_log = config.chain_log;
1572        self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
1573        self.target_len = config.target_len;
1574        if resize && !self.hash_table.is_empty() {
1575            // Force reallocation on next ensure_tables() call.
1576            self.hash_table.clear();
1577            self.chain_table.clear();
1578        }
1579    }
1580
1581    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1582        self.window_size = 0;
1583        self.history.clear();
1584        self.history_start = 0;
1585        self.history_abs_start = 0;
1586        self.offset_hist = [1, 4, 8];
1587        if !self.hash_table.is_empty() {
1588            self.hash_table.fill(HC_EMPTY);
1589            self.chain_table.fill(HC_EMPTY);
1590        }
1591        for mut data in self.window.drain(..) {
1592            data.resize(data.capacity(), 0);
1593            reuse_space(data);
1594        }
1595    }
1596
1597    fn get_last_space(&self) -> &[u8] {
1598        self.window.back().unwrap().as_slice()
1599    }
1600
1601    // History duplicates window data for O(1) contiguous access during match
1602    // finding (common_prefix_len, extend_backwards). Same pattern as
1603    // DfastMatchGenerator. Peak: ~2x window size for data buffers + 6 MB tables.
1604    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1605        assert!(data.len() <= self.max_window_size);
1606        while self.window_size + data.len() > self.max_window_size {
1607            let removed = self.window.pop_front().unwrap();
1608            self.window_size -= removed.len();
1609            self.history_start += removed.len();
1610            self.history_abs_start += removed.len();
1611            reuse_space(removed);
1612        }
1613        self.compact_history();
1614        self.history.extend_from_slice(&data);
1615        self.window_size += data.len();
1616        self.window.push_back(data);
1617    }
1618
1619    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1620        while self.window_size > self.max_window_size {
1621            let removed = self.window.pop_front().unwrap();
1622            self.window_size -= removed.len();
1623            self.history_start += removed.len();
1624            self.history_abs_start += removed.len();
1625            reuse_space(removed);
1626        }
1627    }
1628
1629    /// Backfill positions from the tail of the previous slice that couldn't be
1630    /// hashed at the time (insert_position needs 4 bytes of lookahead).
1631    fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
1632        let backfill_start = current_abs_start
1633            .saturating_sub(3)
1634            .max(self.history_abs_start);
1635        if backfill_start < current_abs_start {
1636            self.insert_positions(backfill_start, current_abs_start);
1637        }
1638    }
1639
1640    fn skip_matching(&mut self) {
1641        self.ensure_tables();
1642        let current_len = self.window.back().unwrap().len();
1643        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1644        self.backfill_boundary_positions(current_abs_start);
1645        self.insert_positions(current_abs_start, current_abs_start + current_len);
1646    }
1647
1648    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1649        self.ensure_tables();
1650
1651        let current_len = self.window.back().unwrap().len();
1652        if current_len == 0 {
1653            return;
1654        }
1655
1656        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1657        self.backfill_boundary_positions(current_abs_start);
1658
1659        let mut pos = 0usize;
1660        let mut literals_start = 0usize;
1661        while pos + HC_MIN_MATCH_LEN <= current_len {
1662            let abs_pos = current_abs_start + pos;
1663            let lit_len = pos - literals_start;
1664
1665            let best = self.find_best_match(abs_pos, lit_len);
1666            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1667                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1668                let current = self.window.back().unwrap().as_slice();
1669                let start = candidate.start - current_abs_start;
1670                let literals = &current[literals_start..start];
1671                handle_sequence(Sequence::Triple {
1672                    literals,
1673                    offset: candidate.offset,
1674                    match_len: candidate.match_len,
1675                });
1676                let _ = encode_offset_with_history(
1677                    candidate.offset as u32,
1678                    literals.len() as u32,
1679                    &mut self.offset_hist,
1680                );
1681                pos = start + candidate.match_len;
1682                literals_start = pos;
1683            } else {
1684                self.insert_position(abs_pos);
1685                pos += 1;
1686            }
1687        }
1688
1689        // Insert remaining hashable positions in the tail (the matching loop
1690        // stops at HC_MIN_MATCH_LEN but insert_position only needs 4 bytes).
1691        while pos + 4 <= current_len {
1692            self.insert_position(current_abs_start + pos);
1693            pos += 1;
1694        }
1695
1696        if literals_start < current_len {
1697            let current = self.window.back().unwrap().as_slice();
1698            handle_sequence(Sequence::Literals {
1699                literals: &current[literals_start..],
1700            });
1701        }
1702    }
1703
1704    fn ensure_tables(&mut self) {
1705        if self.hash_table.is_empty() {
1706            self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
1707            self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
1708        }
1709    }
1710
1711    fn compact_history(&mut self) {
1712        if self.history_start == 0 {
1713            return;
1714        }
1715        if self.history_start >= self.max_window_size
1716            || self.history_start * 2 >= self.history.len()
1717        {
1718            self.history.drain(..self.history_start);
1719            self.history_start = 0;
1720        }
1721    }
1722
1723    fn live_history(&self) -> &[u8] {
1724        &self.history[self.history_start..]
1725    }
1726
1727    fn history_abs_end(&self) -> usize {
1728        self.history_abs_start + self.live_history().len()
1729    }
1730
1731    fn hash_position(&self, data: &[u8]) -> usize {
1732        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1733        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1734        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
1735    }
1736
1737    fn insert_position(&mut self, abs_pos: usize) {
1738        let idx = abs_pos - self.history_abs_start;
1739        let concat = self.live_history();
1740        if idx + 4 > concat.len() {
1741            return;
1742        }
1743        let hash = self.hash_position(&concat[idx..]);
1744        // Store as (abs_pos + 1) so HC_EMPTY (0) never collides with a valid
1745        // position. Guard on usize before cast to avoid silent u32 truncation.
1746        // Streams >4 GiB stop inserting; matches degrade to repcodes-only.
1747        // TODO(#51): rebase table positions to avoid 4 GiB cutoff
1748        if abs_pos >= u32::MAX as usize {
1749            return;
1750        }
1751        let pos_u32 = abs_pos as u32;
1752        let stored = pos_u32 + 1;
1753        let chain_idx = pos_u32 as usize & ((1 << self.chain_log) - 1);
1754        let prev = self.hash_table[hash];
1755        self.chain_table[chain_idx] = prev;
1756        self.hash_table[hash] = stored;
1757    }
1758
1759    fn insert_positions(&mut self, start: usize, end: usize) {
1760        for pos in start..end {
1761            self.insert_position(pos);
1762        }
1763    }
1764
1765    // Fixed-size stack array is intentional: it avoids heap allocation on
1766    // the hot path and the sentinel loop exits at self.search_depth.
1767    fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
1768        let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
1769        let idx = abs_pos - self.history_abs_start;
1770        let concat = self.live_history();
1771        if idx + 4 > concat.len() {
1772            return buf;
1773        }
1774        let hash = self.hash_position(&concat[idx..]);
1775        let chain_mask = (1 << self.chain_log) - 1;
1776
1777        let mut cur = self.hash_table[hash];
1778        let mut filled = 0;
1779        // Follow chain up to search_depth valid candidates, skipping stale
1780        // entries (evicted from window) instead of stopping at them.
1781        // Stored values are (abs_pos + 1); decode with wrapping_sub(1).
1782        // Break on self-loops (masked chain_idx collision at periodicity).
1783        // Cap total steps at 4x search depth to bound time spent skipping
1784        // stale entries while still finding valid candidates deeper in chain.
1785        let mut steps = 0;
1786        let max_chain_steps = self.search_depth * 4;
1787        while filled < self.search_depth && steps < max_chain_steps {
1788            if cur == HC_EMPTY {
1789                break;
1790            }
1791            let candidate_abs = cur.wrapping_sub(1) as usize;
1792            let next = self.chain_table[candidate_abs & chain_mask];
1793            steps += 1;
1794            if next == cur {
1795                // Self-loop: two positions share chain_idx, stop to avoid
1796                // spinning on the same candidate forever.
1797                if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
1798                    buf[filled] = candidate_abs;
1799                }
1800                break;
1801            }
1802            cur = next;
1803            if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
1804                continue;
1805            }
1806            buf[filled] = candidate_abs;
1807            filled += 1;
1808        }
1809        buf
1810    }
1811
1812    fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1813        let rep = self.repcode_candidate(abs_pos, lit_len);
1814        let hash = self.hash_chain_candidate(abs_pos, lit_len);
1815        Self::better_candidate(rep, hash)
1816    }
1817
1818    fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1819        let concat = self.live_history();
1820        let current_idx = abs_pos - self.history_abs_start;
1821        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
1822            return None;
1823        }
1824
1825        let mut best: Option<MatchCandidate> = None;
1826        for candidate_abs in self.chain_candidates(abs_pos) {
1827            if candidate_abs == usize::MAX {
1828                break;
1829            }
1830            let candidate_idx = candidate_abs - self.history_abs_start;
1831            let match_len =
1832                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1833            if match_len >= HC_MIN_MATCH_LEN {
1834                let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
1835                best = Self::better_candidate(best, Some(candidate));
1836                if best.is_some_and(|b| b.match_len >= self.target_len) {
1837                    return best;
1838                }
1839            }
1840        }
1841        best
1842    }
1843
1844    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1845        let reps = if lit_len == 0 {
1846            [
1847                Some(self.offset_hist[1] as usize),
1848                Some(self.offset_hist[2] as usize),
1849                (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1850            ]
1851        } else {
1852            [
1853                Some(self.offset_hist[0] as usize),
1854                Some(self.offset_hist[1] as usize),
1855                Some(self.offset_hist[2] as usize),
1856            ]
1857        };
1858
1859        let concat = self.live_history();
1860        let current_idx = abs_pos - self.history_abs_start;
1861        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
1862            return None;
1863        }
1864
1865        let mut best = None;
1866        for rep in reps.into_iter().flatten() {
1867            if rep == 0 || rep > abs_pos {
1868                continue;
1869            }
1870            let candidate_pos = abs_pos - rep;
1871            if candidate_pos < self.history_abs_start {
1872                continue;
1873            }
1874            let candidate_idx = candidate_pos - self.history_abs_start;
1875            let match_len =
1876                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1877            if match_len >= HC_MIN_MATCH_LEN {
1878                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1879                best = Self::better_candidate(best, Some(candidate));
1880            }
1881        }
1882        best
1883    }
1884
1885    fn extend_backwards(
1886        &self,
1887        mut candidate_pos: usize,
1888        mut abs_pos: usize,
1889        mut match_len: usize,
1890        lit_len: usize,
1891    ) -> MatchCandidate {
1892        let concat = self.live_history();
1893        let min_abs_pos = abs_pos - lit_len;
1894        while abs_pos > min_abs_pos
1895            && candidate_pos > self.history_abs_start
1896            && concat[candidate_pos - self.history_abs_start - 1]
1897                == concat[abs_pos - self.history_abs_start - 1]
1898        {
1899            candidate_pos -= 1;
1900            abs_pos -= 1;
1901            match_len += 1;
1902        }
1903        MatchCandidate {
1904            start: abs_pos,
1905            offset: abs_pos - candidate_pos,
1906            match_len,
1907        }
1908    }
1909
1910    fn better_candidate(
1911        lhs: Option<MatchCandidate>,
1912        rhs: Option<MatchCandidate>,
1913    ) -> Option<MatchCandidate> {
1914        match (lhs, rhs) {
1915            (None, other) | (other, None) => other,
1916            (Some(lhs), Some(rhs)) => {
1917                let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
1918                let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
1919                if rhs_gain > lhs_gain {
1920                    Some(rhs)
1921                } else {
1922                    Some(lhs)
1923                }
1924            }
1925        }
1926    }
1927
1928    fn match_gain(match_len: usize, offset: usize) -> i32 {
1929        debug_assert!(
1930            offset > 0,
1931            "zstd offsets are 1-indexed, offset=0 is invalid"
1932        );
1933        let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
1934        (match_len as i32) * 4 - offset_bits
1935    }
1936
1937    // Lazy lookahead queries pos+1/pos+2 before they are inserted into hash
1938    // tables — matching C zstd behavior. Seeding before comparing would let a
1939    // position match against itself, changing semantics.
1940    fn pick_lazy_match(
1941        &self,
1942        abs_pos: usize,
1943        lit_len: usize,
1944        best: Option<MatchCandidate>,
1945    ) -> Option<MatchCandidate> {
1946        let best = best?;
1947        if best.match_len >= self.target_len
1948            || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
1949        {
1950            return Some(best);
1951        }
1952
1953        let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
1954
1955        // Lazy check: evaluate pos+1
1956        let next = self.find_best_match(abs_pos + 1, lit_len + 1);
1957        if let Some(next) = next {
1958            let next_gain = Self::match_gain(next.match_len, next.offset);
1959            if next_gain > current_gain {
1960                return None;
1961            }
1962        }
1963
1964        // Lazy2 check: also evaluate pos+2
1965        if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
1966            let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
1967            if let Some(next2) = next2 {
1968                let next2_gain = Self::match_gain(next2.match_len, next2.offset);
1969                // Must beat current gain + extra literal cost
1970                if next2_gain > current_gain + 4 {
1971                    return None;
1972                }
1973            }
1974        }
1975
1976        Some(best)
1977    }
1978}
1979
1980#[test]
1981fn matches() {
1982    let mut matcher = MatchGenerator::new(1000);
1983    let mut original_data = Vec::new();
1984    let mut reconstructed = Vec::new();
1985
1986    let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
1987        Sequence::Literals { literals } => {
1988            assert!(!literals.is_empty());
1989            reconstructed.extend_from_slice(literals);
1990        }
1991        Sequence::Triple {
1992            literals,
1993            offset,
1994            match_len,
1995        } => {
1996            assert!(offset > 0);
1997            assert!(match_len >= MIN_MATCH_LEN);
1998            reconstructed.extend_from_slice(literals);
1999            assert!(offset <= reconstructed.len());
2000            let start = reconstructed.len() - offset;
2001            for i in 0..match_len {
2002                let byte = reconstructed[start + i];
2003                reconstructed.push(byte);
2004            }
2005        }
2006    };
2007
2008    matcher.add_data(
2009        alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2010        SuffixStore::with_capacity(100),
2011        |_, _| {},
2012    );
2013    original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
2014
2015    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2016
2017    assert!(!matcher.next_sequence(|_| {}));
2018
2019    matcher.add_data(
2020        alloc::vec![
2021            1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2022        ],
2023        SuffixStore::with_capacity(100),
2024        |_, _| {},
2025    );
2026    original_data.extend_from_slice(&[
2027        1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2028    ]);
2029
2030    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2031    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2032    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2033    assert!(!matcher.next_sequence(|_| {}));
2034
2035    matcher.add_data(
2036        alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
2037        SuffixStore::with_capacity(100),
2038        |_, _| {},
2039    );
2040    original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
2041
2042    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2043    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2044    assert!(!matcher.next_sequence(|_| {}));
2045
2046    matcher.add_data(
2047        alloc::vec![0, 0, 0, 0, 0],
2048        SuffixStore::with_capacity(100),
2049        |_, _| {},
2050    );
2051    original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
2052
2053    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2054    assert!(!matcher.next_sequence(|_| {}));
2055
2056    matcher.add_data(
2057        alloc::vec![7, 8, 9, 10, 11],
2058        SuffixStore::with_capacity(100),
2059        |_, _| {},
2060    );
2061    original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
2062
2063    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2064    assert!(!matcher.next_sequence(|_| {}));
2065
2066    matcher.add_data(
2067        alloc::vec![1, 3, 5, 7, 9],
2068        SuffixStore::with_capacity(100),
2069        |_, _| {},
2070    );
2071    matcher.skip_matching();
2072    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2073    reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
2074    assert!(!matcher.next_sequence(|_| {}));
2075
2076    matcher.add_data(
2077        alloc::vec![1, 3, 5, 7, 9],
2078        SuffixStore::with_capacity(100),
2079        |_, _| {},
2080    );
2081    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2082
2083    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2084    assert!(!matcher.next_sequence(|_| {}));
2085
2086    matcher.add_data(
2087        alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
2088        SuffixStore::with_capacity(100),
2089        |_, _| {},
2090    );
2091    original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
2092
2093    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2094    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2095    assert!(!matcher.next_sequence(|_| {}));
2096
2097    assert_eq!(reconstructed, original_data);
2098}
2099
2100#[test]
2101fn dfast_matches_roundtrip_multi_block_pattern() {
2102    let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
2103    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2104    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2105
2106    let mut matcher = DfastMatchGenerator::new(1 << 22);
2107    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2108        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2109        Sequence::Triple {
2110            literals,
2111            offset,
2112            match_len,
2113        } => {
2114            decoded.extend_from_slice(literals);
2115            let start = decoded.len() - offset;
2116            for i in 0..match_len {
2117                let byte = decoded[start + i];
2118                decoded.push(byte);
2119            }
2120        }
2121    };
2122
2123    matcher.add_data(first_block.clone(), |_| {});
2124    let mut history = Vec::new();
2125    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2126    assert_eq!(history, first_block);
2127
2128    matcher.add_data(second_block.clone(), |_| {});
2129    let prefix_len = history.len();
2130    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2131
2132    assert_eq!(&history[prefix_len..], second_block.as_slice());
2133}
2134
2135#[test]
2136fn driver_switches_backends_and_initializes_dfast_via_reset() {
2137    let mut driver = MatchGeneratorDriver::new(32, 2);
2138
2139    driver.reset(CompressionLevel::Default);
2140    assert_eq!(driver.window_size(), (1u64 << 22));
2141
2142    let mut first = driver.get_next_space();
2143    first[..12].copy_from_slice(b"abcabcabcabc");
2144    first.truncate(12);
2145    driver.commit_space(first);
2146    assert_eq!(driver.get_last_space(), b"abcabcabcabc");
2147    driver.skip_matching();
2148
2149    let mut second = driver.get_next_space();
2150    second[..12].copy_from_slice(b"abcabcabcabc");
2151    second.truncate(12);
2152    driver.commit_space(second);
2153
2154    let mut reconstructed = b"abcabcabcabc".to_vec();
2155    driver.start_matching(|seq| match seq {
2156        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2157        Sequence::Triple {
2158            literals,
2159            offset,
2160            match_len,
2161        } => {
2162            reconstructed.extend_from_slice(literals);
2163            let start = reconstructed.len() - offset;
2164            for i in 0..match_len {
2165                let byte = reconstructed[start + i];
2166                reconstructed.push(byte);
2167            }
2168        }
2169    });
2170    assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
2171
2172    driver.reset(CompressionLevel::Fastest);
2173    assert_eq!(driver.window_size(), (1u64 << 17));
2174}
2175
2176#[test]
2177fn driver_small_source_hint_shrinks_dfast_hash_tables() {
2178    let mut driver = MatchGeneratorDriver::new(32, 2);
2179
2180    driver.reset(CompressionLevel::Default);
2181    let mut space = driver.get_next_space();
2182    space[..12].copy_from_slice(b"abcabcabcabc");
2183    space.truncate(12);
2184    driver.commit_space(space);
2185    driver.skip_matching();
2186    let full_tables = driver.dfast_matcher().short_hash.len();
2187    assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
2188
2189    driver.set_source_size_hint(1024);
2190    driver.reset(CompressionLevel::Default);
2191    let mut space = driver.get_next_space();
2192    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2193    space.truncate(12);
2194    driver.commit_space(space);
2195    driver.skip_matching();
2196    let hinted_tables = driver.dfast_matcher().short_hash.len();
2197
2198    assert_eq!(driver.window_size(), 1 << MIN_WINDOW_LOG);
2199    assert_eq!(hinted_tables, 1 << MIN_WINDOW_LOG);
2200    assert!(
2201        hinted_tables < full_tables,
2202        "tiny source hint should reduce dfast table footprint"
2203    );
2204}
2205
2206#[test]
2207fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
2208    let mut driver = MatchGeneratorDriver::new(32, 2);
2209
2210    driver.reset(CompressionLevel::Level(2));
2211    let mut space = driver.get_next_space();
2212    space[..12].copy_from_slice(b"abcabcabcabc");
2213    space.truncate(12);
2214    driver.commit_space(space);
2215    driver.skip_matching();
2216
2217    let table_len = driver.dfast_matcher().short_hash.len();
2218    assert_eq!(
2219        table_len,
2220        1 << DFAST_HASH_BITS,
2221        "unhinted Level(2) should keep default dfast table size"
2222    );
2223}
2224
2225#[test]
2226fn simple_backend_rejects_undersized_pooled_suffix_store() {
2227    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
2228    driver.reset(CompressionLevel::Fastest);
2229
2230    driver.suffix_pool.push(SuffixStore::with_capacity(1024));
2231
2232    let mut space = driver.get_next_space();
2233    space.clear();
2234    space.resize(4096, 0xAB);
2235    driver.commit_space(space);
2236
2237    let last_suffix_slots = driver
2238        .match_generator
2239        .window
2240        .last()
2241        .expect("window entry must exist after commit")
2242        .suffixes
2243        .slots
2244        .len();
2245    assert!(
2246        last_suffix_slots >= 4096,
2247        "undersized pooled suffix store must not be reused for larger blocks"
2248    );
2249}
2250
2251#[test]
2252fn source_hint_clamps_driver_slice_size_to_window() {
2253    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
2254    driver.set_source_size_hint(1024);
2255    driver.reset(CompressionLevel::Default);
2256
2257    let window = driver.window_size() as usize;
2258    assert_eq!(window, 1024);
2259    assert_eq!(driver.slice_size, window);
2260
2261    let space = driver.get_next_space();
2262    assert_eq!(space.len(), window);
2263    driver.commit_space(space);
2264}
2265
2266#[test]
2267fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
2268    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
2269    driver.reset(CompressionLevel::Default);
2270
2271    let large = driver.get_next_space();
2272    let large_capacity = large.capacity();
2273    assert!(large_capacity >= 128 * 1024);
2274    driver.commit_space(large);
2275
2276    driver.set_source_size_hint(1024);
2277    driver.reset(CompressionLevel::Default);
2278
2279    let small = driver.get_next_space();
2280    assert_eq!(small.len(), 1024);
2281    assert!(
2282        small.capacity() >= large_capacity,
2283        "pooled buffer capacity should be preserved to avoid shrink/grow churn"
2284    );
2285}
2286
2287#[test]
2288fn driver_best_to_fastest_releases_oversized_hc_tables() {
2289    let mut driver = MatchGeneratorDriver::new(32, 2);
2290
2291    // Initialize at Best — allocates large HC tables (2M hash, 1M chain).
2292    driver.reset(CompressionLevel::Best);
2293    assert_eq!(driver.window_size(), (1u64 << 24));
2294
2295    // Feed data so tables are actually allocated via ensure_tables().
2296    let mut space = driver.get_next_space();
2297    space[..12].copy_from_slice(b"abcabcabcabc");
2298    space.truncate(12);
2299    driver.commit_space(space);
2300    driver.skip_matching();
2301
2302    // Switch to Fastest — must release HC tables.
2303    driver.reset(CompressionLevel::Fastest);
2304    assert_eq!(driver.window_size(), (1u64 << 17));
2305
2306    // HC matcher should have empty tables after backend switch.
2307    let hc = driver.hc_match_generator.as_ref().unwrap();
2308    assert!(
2309        hc.hash_table.is_empty(),
2310        "HC hash_table should be released after switching away from Best"
2311    );
2312    assert!(
2313        hc.chain_table.is_empty(),
2314        "HC chain_table should be released after switching away from Best"
2315    );
2316}
2317
2318#[test]
2319fn driver_better_to_best_resizes_hc_tables() {
2320    let mut driver = MatchGeneratorDriver::new(32, 2);
2321
2322    // Initialize at Better — allocates small HC tables (1M hash, 512K chain).
2323    driver.reset(CompressionLevel::Better);
2324    assert_eq!(driver.window_size(), (1u64 << 23));
2325
2326    let mut space = driver.get_next_space();
2327    space[..12].copy_from_slice(b"abcabcabcabc");
2328    space.truncate(12);
2329    driver.commit_space(space);
2330    driver.skip_matching();
2331
2332    let hc = driver.hc_match_generator.as_ref().unwrap();
2333    let better_hash_len = hc.hash_table.len();
2334    let better_chain_len = hc.chain_table.len();
2335
2336    // Switch to Best — must resize to larger tables.
2337    driver.reset(CompressionLevel::Best);
2338    assert_eq!(driver.window_size(), (1u64 << 24));
2339
2340    // Feed data to trigger ensure_tables with new sizes.
2341    let mut space = driver.get_next_space();
2342    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2343    space.truncate(12);
2344    driver.commit_space(space);
2345    driver.skip_matching();
2346
2347    let hc = driver.hc_match_generator.as_ref().unwrap();
2348    assert!(
2349        hc.hash_table.len() > better_hash_len,
2350        "Best hash_table ({}) should be larger than Better ({})",
2351        hc.hash_table.len(),
2352        better_hash_len
2353    );
2354    assert!(
2355        hc.chain_table.len() > better_chain_len,
2356        "Best chain_table ({}) should be larger than Better ({})",
2357        hc.chain_table.len(),
2358        better_chain_len
2359    );
2360}
2361
2362#[test]
2363fn prime_with_dictionary_preserves_history_for_first_full_block() {
2364    let mut driver = MatchGeneratorDriver::new(8, 1);
2365    driver.reset(CompressionLevel::Fastest);
2366
2367    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2368
2369    let mut space = driver.get_next_space();
2370    space.clear();
2371    space.extend_from_slice(b"abcdefgh");
2372    driver.commit_space(space);
2373
2374    let mut saw_match = false;
2375    driver.start_matching(|seq| {
2376        if let Sequence::Triple {
2377            literals,
2378            offset,
2379            match_len,
2380        } = seq
2381            && literals.is_empty()
2382            && offset == 8
2383            && match_len >= MIN_MATCH_LEN
2384        {
2385            saw_match = true;
2386        }
2387    });
2388
2389    assert!(
2390        saw_match,
2391        "first full block should still match dictionary-primed history"
2392    );
2393}
2394
2395#[test]
2396fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
2397    let mut driver = MatchGeneratorDriver::new(8, 1);
2398    driver.reset(CompressionLevel::Fastest);
2399
2400    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2401
2402    let mut space = driver.get_next_space();
2403    space.clear();
2404    space.extend_from_slice(b"abcdefgh");
2405    driver.commit_space(space);
2406
2407    let mut saw_match = false;
2408    driver.start_matching(|seq| {
2409        if let Sequence::Triple {
2410            literals,
2411            offset,
2412            match_len,
2413        } = seq
2414            && literals.is_empty()
2415            && offset == 24
2416            && match_len >= MIN_MATCH_LEN
2417        {
2418            saw_match = true;
2419        }
2420    });
2421
2422    assert!(
2423        saw_match,
2424        "dictionary bytes should remain addressable until frame output exceeds the live window"
2425    );
2426}
2427
2428#[test]
2429fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
2430    let mut driver = MatchGeneratorDriver::new(8, 1);
2431    driver.reset(CompressionLevel::Fastest);
2432
2433    driver.prime_with_dictionary(&[], [11, 7, 3]);
2434
2435    assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
2436}
2437
2438#[test]
2439fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
2440    let mut driver = MatchGeneratorDriver::new(8, 1);
2441    driver.reset(CompressionLevel::Default);
2442
2443    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2444
2445    let mut space = driver.get_next_space();
2446    space.clear();
2447    space.extend_from_slice(b"abcdefgh");
2448    driver.commit_space(space);
2449
2450    let mut saw_match = false;
2451    driver.start_matching(|seq| {
2452        if let Sequence::Triple {
2453            literals,
2454            offset,
2455            match_len,
2456        } = seq
2457            && literals.is_empty()
2458            && offset == 8
2459            && match_len >= DFAST_MIN_MATCH_LEN
2460        {
2461            saw_match = true;
2462        }
2463    });
2464
2465    assert!(
2466        saw_match,
2467        "dfast backend should match dictionary-primed history in first full block"
2468    );
2469}
2470
2471#[test]
2472fn prime_with_dictionary_does_not_inflate_reported_window_size() {
2473    let mut driver = MatchGeneratorDriver::new(8, 1);
2474    driver.reset(CompressionLevel::Fastest);
2475
2476    let before = driver.window_size();
2477    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2478    let after = driver.window_size();
2479
2480    assert_eq!(
2481        after, before,
2482        "dictionary retention budget must not change reported frame window size"
2483    );
2484}
2485
2486#[test]
2487fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
2488    let mut driver = MatchGeneratorDriver::new(8, 2);
2489    driver.reset(CompressionLevel::Fastest);
2490
2491    // This dictionary leaves a 1-byte tail chunk (capacity=1 suffix table),
2492    // which should never be committed to the matcher window.
2493    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
2494
2495    assert!(
2496        driver
2497            .match_generator
2498            .window
2499            .iter()
2500            .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
2501        "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
2502    );
2503}
2504
2505#[test]
2506fn prime_with_dictionary_counts_only_committed_tail_budget() {
2507    let mut driver = MatchGeneratorDriver::new(8, 1);
2508    driver.reset(CompressionLevel::Fastest);
2509
2510    let before = driver.match_generator.max_window_size;
2511    // One full slice plus a 1-byte tail that cannot be committed.
2512    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
2513
2514    assert_eq!(
2515        driver.match_generator.max_window_size,
2516        before + 8,
2517        "retention budget must account only for dictionary bytes actually committed to history"
2518    );
2519}
2520
2521#[test]
2522fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
2523    let mut driver = MatchGeneratorDriver::new(8, 1);
2524    driver.reset(CompressionLevel::Default);
2525
2526    let before = driver.dfast_matcher().max_window_size;
2527    // One full slice plus a 4-byte tail. Dfast can still use this tail through
2528    // short-hash overlap into the next block, so it should stay retained.
2529    driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
2530
2531    assert_eq!(
2532        driver.dfast_matcher().max_window_size,
2533        before + 12,
2534        "dfast retention budget should include 4-byte dictionary tails"
2535    );
2536}
2537
2538#[test]
2539fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
2540    let mut driver = MatchGeneratorDriver::new(8, 1);
2541    driver.reset(CompressionLevel::Fastest);
2542    // Use a small live window so dictionary-primed slices are evicted
2543    // quickly and budget retirement can be asserted deterministically.
2544    driver.match_generator.max_window_size = 8;
2545    driver.reported_window_size = 8;
2546
2547    let base_window = driver.match_generator.max_window_size;
2548    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2549    assert_eq!(driver.match_generator.max_window_size, base_window + 24);
2550
2551    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2552        let mut space = driver.get_next_space();
2553        space.clear();
2554        space.extend_from_slice(block);
2555        driver.commit_space(space);
2556        driver.skip_matching();
2557    }
2558
2559    assert_eq!(
2560        driver.dictionary_retained_budget, 0,
2561        "dictionary budget should be fully retired once primed dict slices are evicted"
2562    );
2563    assert_eq!(
2564        driver.match_generator.max_window_size, base_window,
2565        "retired dictionary budget must not remain reusable for live history"
2566    );
2567}
2568
2569#[test]
2570fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
2571    let mut driver = MatchGeneratorDriver::new(8, 1);
2572    driver.reset(CompressionLevel::Default);
2573    // Use a small live window in this regression so dictionary-primed slices are
2574    // evicted quickly and budget retirement can be asserted deterministically.
2575    driver.dfast_matcher_mut().max_window_size = 8;
2576    driver.reported_window_size = 8;
2577
2578    let base_window = driver.dfast_matcher().max_window_size;
2579    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2580    assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
2581
2582    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2583        let mut space = driver.get_next_space();
2584        space.clear();
2585        space.extend_from_slice(block);
2586        driver.commit_space(space);
2587        driver.skip_matching();
2588    }
2589
2590    assert_eq!(
2591        driver.dictionary_retained_budget, 0,
2592        "dictionary budget should be fully retired once primed dict slices are evicted"
2593    );
2594    assert_eq!(
2595        driver.dfast_matcher().max_window_size,
2596        base_window,
2597        "retired dictionary budget must not remain reusable for live history"
2598    );
2599}
2600
2601#[test]
2602fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
2603    let mut driver = MatchGeneratorDriver::new(8, 1);
2604    driver.reset(CompressionLevel::Better);
2605
2606    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
2607
2608    let mut space = driver.get_next_space();
2609    space.clear();
2610    // Repeat the dictionary content so the HC matcher can find it.
2611    // HC_MIN_MATCH_LEN is 5, so an 8-byte match is well above threshold.
2612    space.extend_from_slice(b"abcdefgh");
2613    driver.commit_space(space);
2614
2615    let mut saw_match = false;
2616    driver.start_matching(|seq| {
2617        if let Sequence::Triple {
2618            literals,
2619            offset,
2620            match_len,
2621        } = seq
2622            && literals.is_empty()
2623            && offset == 8
2624            && match_len >= HC_MIN_MATCH_LEN
2625        {
2626            saw_match = true;
2627        }
2628    });
2629
2630    assert!(
2631        saw_match,
2632        "hash-chain backend should match dictionary-primed history in first full block"
2633    );
2634}
2635
2636#[test]
2637fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
2638    let mut driver = MatchGeneratorDriver::new(8, 1);
2639    driver.reset(CompressionLevel::Better);
2640    // Use a small live window so dictionary-primed slices are evicted quickly.
2641    driver.hc_matcher_mut().max_window_size = 8;
2642    driver.reported_window_size = 8;
2643
2644    let base_window = driver.hc_matcher().max_window_size;
2645    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
2646    assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
2647
2648    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
2649        let mut space = driver.get_next_space();
2650        space.clear();
2651        space.extend_from_slice(block);
2652        driver.commit_space(space);
2653        driver.skip_matching();
2654    }
2655
2656    assert_eq!(
2657        driver.dictionary_retained_budget, 0,
2658        "dictionary budget should be fully retired once primed dict slices are evicted"
2659    );
2660    assert_eq!(
2661        driver.hc_matcher().max_window_size,
2662        base_window,
2663        "retired dictionary budget must not remain reusable for live history"
2664    );
2665}
2666
2667#[test]
2668fn suffix_store_with_single_slot_does_not_panic_on_keying() {
2669    let mut suffixes = SuffixStore::with_capacity(1);
2670    suffixes.insert(b"abcde", 0);
2671    assert!(suffixes.contains_key(b"abcde"));
2672    assert_eq!(suffixes.get(b"abcde"), Some(0));
2673}
2674
2675#[test]
2676fn fastest_reset_uses_interleaved_hash_fill_step() {
2677    let mut driver = MatchGeneratorDriver::new(32, 2);
2678
2679    driver.reset(CompressionLevel::Uncompressed);
2680    assert_eq!(driver.match_generator.hash_fill_step, 1);
2681
2682    driver.reset(CompressionLevel::Fastest);
2683    assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
2684
2685    // Better uses the HashChain backend with lazy2; verify that the backend switch
2686    // happened and the lazy_depth is configured correctly.
2687    driver.reset(CompressionLevel::Better);
2688    assert_eq!(driver.active_backend, MatcherBackend::HashChain);
2689    assert_eq!(driver.window_size(), (1u64 << 23));
2690    assert_eq!(driver.hc_matcher().lazy_depth, 2);
2691}
2692
2693#[test]
2694fn simple_matcher_updates_offset_history_after_emitting_match() {
2695    let mut matcher = MatchGenerator::new(64);
2696    matcher.add_data(
2697        b"abcdeabcdeabcde".to_vec(),
2698        SuffixStore::with_capacity(64),
2699        |_, _| {},
2700    );
2701
2702    assert!(matcher.next_sequence(|seq| {
2703        assert_eq!(
2704            seq,
2705            Sequence::Triple {
2706                literals: b"abcde",
2707                offset: 5,
2708                match_len: 10,
2709            }
2710        );
2711    }));
2712    assert_eq!(matcher.offset_hist, [5, 1, 4]);
2713}
2714
2715#[test]
2716fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
2717    let mut matcher = MatchGenerator::new(64);
2718    matcher.add_data(
2719        b"abcdefghijabcdefghij".to_vec(),
2720        SuffixStore::with_capacity(64),
2721        |_, _| {},
2722    );
2723
2724    matcher.suffix_idx = 10;
2725    matcher.last_idx_in_sequence = 10;
2726    matcher.offset_hist = [99, 10, 4];
2727
2728    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2729    assert_eq!(candidate, Some((10, 10)));
2730}
2731
2732#[test]
2733fn simple_matcher_repcode_can_target_previous_window_entry() {
2734    let mut matcher = MatchGenerator::new(64);
2735    matcher.add_data(
2736        b"abcdefghij".to_vec(),
2737        SuffixStore::with_capacity(64),
2738        |_, _| {},
2739    );
2740    matcher.skip_matching();
2741    matcher.add_data(
2742        b"abcdefghij".to_vec(),
2743        SuffixStore::with_capacity(64),
2744        |_, _| {},
2745    );
2746
2747    matcher.offset_hist = [99, 10, 4];
2748
2749    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
2750    assert_eq!(candidate, Some((10, 10)));
2751}
2752
2753#[test]
2754fn simple_matcher_zero_literal_repcode_checks_rep2() {
2755    let mut matcher = MatchGenerator::new(64);
2756    matcher.add_data(
2757        b"abcdefghijabcdefghij".to_vec(),
2758        SuffixStore::with_capacity(64),
2759        |_, _| {},
2760    );
2761    matcher.suffix_idx = 10;
2762    matcher.last_idx_in_sequence = 10;
2763    // rep1=4 does not match at idx 10, rep2=10 does.
2764    matcher.offset_hist = [99, 4, 10];
2765
2766    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2767    assert_eq!(candidate, Some((10, 10)));
2768}
2769
2770#[test]
2771fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
2772    let mut matcher = MatchGenerator::new(64);
2773    matcher.add_data(
2774        b"abcdefghijabcdefghij".to_vec(),
2775        SuffixStore::with_capacity(64),
2776        |_, _| {},
2777    );
2778    matcher.suffix_idx = 10;
2779    matcher.last_idx_in_sequence = 10;
2780    // rep1=4 and rep2=99 do not match; rep0-1 == 10 does.
2781    matcher.offset_hist = [11, 4, 99];
2782
2783    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
2784    assert_eq!(candidate, Some((10, 10)));
2785}
2786
2787#[test]
2788fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
2789    let mut matcher = MatchGenerator::new(64);
2790    matcher.add_data(
2791        b"abcdefghij".to_vec(),
2792        SuffixStore::with_capacity(64),
2793        |_, _| {},
2794    );
2795    matcher.skip_matching();
2796    matcher.add_data(
2797        b"klmnopqrst".to_vec(),
2798        SuffixStore::with_capacity(64),
2799        |_, _| {},
2800    );
2801    matcher.suffix_idx = 3;
2802
2803    let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
2804    assert_eq!(candidate, None);
2805}
2806
2807#[test]
2808fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
2809    let mut matcher = MatchGenerator::new(64);
2810    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2811    matcher.add_data(
2812        b"abcdefghijklmnop".to_vec(),
2813        SuffixStore::with_capacity(64),
2814        |_, _| {},
2815    );
2816    matcher.skip_matching();
2817    matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
2818
2819    assert!(matcher.next_sequence(|seq| {
2820        assert_eq!(
2821            seq,
2822            Sequence::Triple {
2823                literals: b"",
2824                offset: 15,
2825                match_len: 5,
2826            }
2827        );
2828    }));
2829    assert!(!matcher.next_sequence(|_| {}));
2830}
2831
2832#[test]
2833fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
2834    let mut matcher = MatchGenerator::new(64);
2835    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2836    matcher.add_data(
2837        b"01234abcde".to_vec(),
2838        SuffixStore::with_capacity(64),
2839        |_, _| {},
2840    );
2841    matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
2842
2843    let last = matcher.window.last().unwrap();
2844    let tail = &last.data[5..10];
2845    assert_eq!(last.suffixes.get(tail), Some(5));
2846}
2847
2848#[test]
2849fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
2850    let mut matcher = MatchGenerator::new(128);
2851    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2852    matcher.add_data(
2853        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
2854        SuffixStore::with_capacity(1 << 16),
2855        |_, _| {},
2856    );
2857
2858    matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
2859
2860    let last = matcher.window.last().unwrap();
2861    let first_key = &last.data[..MIN_MATCH_LEN];
2862    assert_eq!(last.suffixes.get(first_key), None);
2863}
2864
2865#[test]
2866fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
2867    let mut matcher = MatchGenerator::new(128);
2868    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
2869    matcher.add_data(
2870        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
2871        SuffixStore::with_capacity(1 << 16),
2872        |_, _| {},
2873    );
2874
2875    matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
2876
2877    let last = matcher.window.last().unwrap();
2878    for pos in [0usize, 3, 6, 9, 12] {
2879        let key = &last.data[pos..pos + MIN_MATCH_LEN];
2880        assert_eq!(
2881            last.suffixes.get(key),
2882            Some(pos),
2883            "expected interleaved suffix registration at pos {pos}"
2884        );
2885    }
2886}
2887
2888#[test]
2889fn dfast_skip_matching_handles_window_eviction() {
2890    let mut matcher = DfastMatchGenerator::new(16);
2891
2892    matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
2893    matcher.skip_matching();
2894    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
2895    matcher.skip_matching();
2896    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
2897
2898    let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
2899    matcher.start_matching(|seq| match seq {
2900        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2901        Sequence::Triple {
2902            literals,
2903            offset,
2904            match_len,
2905        } => {
2906            reconstructed.extend_from_slice(literals);
2907            let start = reconstructed.len() - offset;
2908            for i in 0..match_len {
2909                let byte = reconstructed[start + i];
2910                reconstructed.push(byte);
2911            }
2912        }
2913    });
2914
2915    assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
2916}
2917
2918#[test]
2919fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
2920    let mut matcher = DfastMatchGenerator::new(8);
2921
2922    let mut first = Vec::with_capacity(64);
2923    first.extend_from_slice(b"abcdefgh");
2924    matcher.add_data(first, |_| {});
2925
2926    let mut second = Vec::with_capacity(64);
2927    second.extend_from_slice(b"ijklmnop");
2928
2929    let mut observed_evicted_len = None;
2930    matcher.add_data(second, |data| {
2931        observed_evicted_len = Some(data.len());
2932    });
2933
2934    assert_eq!(
2935        observed_evicted_len,
2936        Some(8),
2937        "eviction callback must report evicted byte length, not backing capacity"
2938    );
2939}
2940
2941#[test]
2942fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
2943    let mut matcher = DfastMatchGenerator::new(16);
2944
2945    let mut first = Vec::with_capacity(64);
2946    first.extend_from_slice(b"abcdefgh");
2947    matcher.add_data(first, |_| {});
2948
2949    let mut second = Vec::with_capacity(64);
2950    second.extend_from_slice(b"ijklmnop");
2951    matcher.add_data(second, |_| {});
2952
2953    matcher.max_window_size = 8;
2954
2955    let mut observed_evicted_len = None;
2956    matcher.trim_to_window(|data| {
2957        observed_evicted_len = Some(data.len());
2958    });
2959
2960    assert_eq!(
2961        observed_evicted_len,
2962        Some(8),
2963        "trim callback must report evicted byte length, not backing capacity"
2964    );
2965}
2966
2967#[test]
2968fn dfast_inserts_tail_positions_for_next_block_matching() {
2969    let mut matcher = DfastMatchGenerator::new(1 << 22);
2970
2971    matcher.add_data(b"012345bcdea".to_vec(), |_| {});
2972    let mut history = Vec::new();
2973    matcher.start_matching(|seq| match seq {
2974        Sequence::Literals { literals } => history.extend_from_slice(literals),
2975        Sequence::Triple { .. } => unreachable!("first block should not match history"),
2976    });
2977    assert_eq!(history, b"012345bcdea");
2978
2979    matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
2980    let mut saw_first_sequence = false;
2981    matcher.start_matching(|seq| {
2982        assert!(!saw_first_sequence, "expected a single cross-block match");
2983        saw_first_sequence = true;
2984        match seq {
2985            Sequence::Literals { .. } => {
2986                panic!("expected tail-anchored cross-block match before any literals")
2987            }
2988            Sequence::Triple {
2989                literals,
2990                offset,
2991                match_len,
2992            } => {
2993                assert_eq!(literals, b"");
2994                assert_eq!(offset, 5);
2995                assert_eq!(match_len, 11);
2996                let start = history.len() - offset;
2997                for i in 0..match_len {
2998                    let byte = history[start + i];
2999                    history.push(byte);
3000                }
3001            }
3002        }
3003    });
3004
3005    assert!(
3006        saw_first_sequence,
3007        "expected tail-anchored cross-block match"
3008    );
3009    assert_eq!(history, b"012345bcdeabcdeabcdeab");
3010}