Skip to main content

structured_zstd/encoding/
match_generator.rs

1//! Matching algorithm used find repeated parts in the original data
2//!
3//! The Zstd format relies on finden repeated sequences of data and compressing these sequences as instructions to the decoder.
4//! A sequence basically tells the decoder "Go back X bytes and copy Y bytes to the end of your decode buffer".
5//!
6//! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data.
7
8use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10use core::convert::TryInto;
11use core::num::NonZeroUsize;
12
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17
18const MIN_MATCH_LEN: usize = 5;
19const FAST_HASH_FILL_STEP: usize = 3;
20const DFAST_MIN_MATCH_LEN: usize = 6;
21const ROW_MIN_MATCH_LEN: usize = 6;
22const DFAST_TARGET_LEN: usize = 48;
23// Keep these aligned with the issue's zstd level-3/dfast target unless ratio
24// measurements show we can shrink them without regressing acceptance tests.
25const DFAST_HASH_BITS: usize = 20;
26const DFAST_SEARCH_DEPTH: usize = 4;
27const DFAST_EMPTY_SLOT: usize = usize::MAX;
28const ROW_HASH_BITS: usize = 20;
29const ROW_LOG: usize = 5;
30const ROW_SEARCH_DEPTH: usize = 16;
31const ROW_TARGET_LEN: usize = 48;
32const ROW_TAG_BITS: usize = 8;
33const ROW_EMPTY_SLOT: usize = usize::MAX;
34const ROW_HASH_KEY_LEN: usize = 4;
35
36const HC_HASH_LOG: usize = 20;
37const HC_CHAIN_LOG: usize = 19;
38const HC_SEARCH_DEPTH: usize = 16;
39const HC_MIN_MATCH_LEN: usize = 5;
40const HC_TARGET_LEN: usize = 48;
41// Positions are stored as (relative_pos + 1) so that 0 is a safe empty
42// sentinel that can never collide with any valid position.
43const HC_EMPTY: u32 = 0;
44
45// Maximum search depth across all HC-based levels. Used to size the
46// fixed-length candidate array returned by chain_candidates().
47const MAX_HC_SEARCH_DEPTH: usize = 32;
48
49/// Bundled tuning knobs for the hash-chain matcher. Using a typed config
50/// instead of positional `usize` args eliminates parameter-order hazards.
51#[derive(Copy, Clone)]
52struct HcConfig {
53    hash_log: usize,
54    chain_log: usize,
55    search_depth: usize,
56    target_len: usize,
57}
58
59#[derive(Copy, Clone)]
60struct RowConfig {
61    hash_bits: usize,
62    row_log: usize,
63    search_depth: usize,
64    target_len: usize,
65}
66
67const HC_CONFIG: HcConfig = HcConfig {
68    hash_log: HC_HASH_LOG,
69    chain_log: HC_CHAIN_LOG,
70    search_depth: HC_SEARCH_DEPTH,
71    target_len: HC_TARGET_LEN,
72};
73
74/// Best-level: deeper search, larger tables, higher target length.
75const BEST_HC_CONFIG: HcConfig = HcConfig {
76    hash_log: 21,
77    chain_log: 20,
78    search_depth: 32,
79    target_len: 128,
80};
81
82const ROW_CONFIG: RowConfig = RowConfig {
83    hash_bits: ROW_HASH_BITS,
84    row_log: ROW_LOG,
85    search_depth: ROW_SEARCH_DEPTH,
86    target_len: ROW_TARGET_LEN,
87};
88
89/// Resolved tuning parameters for a compression level.
90#[derive(Copy, Clone)]
91struct LevelParams {
92    backend: MatcherBackend,
93    window_log: u8,
94    hash_fill_step: usize,
95    lazy_depth: u8,
96    hc: HcConfig,
97    row: RowConfig,
98}
99
100fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
101    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
102    window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
103}
104
105fn row_hash_bits_for_window(max_window_size: usize) -> usize {
106    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
107    window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
108}
109
110/// Parameter table for numeric compression levels 1–22.
111///
112/// Each entry maps a zstd compression level to the best-available matcher
113/// backend and tuning knobs.  Levels that require strategies this crate does
114/// not implement (greedy, btopt, btultra) are approximated with the closest
115/// available backend.
116///
117/// Index 0 = level 1, index 21 = level 22.
118#[rustfmt::skip]
119const LEVEL_TABLE: [LevelParams; 22] = [
120    // Lvl  Strategy       wlog  step  lazy  HC config                                   row config
121    // ---  -------------- ----  ----  ----  ------------------------------------------  ----------
122    /* 1 */ LevelParams { backend: MatcherBackend::Simple,    window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
123    /* 2 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
124    /* 3 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
125    /* 4 */ LevelParams { backend: MatcherBackend::Row,       window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
126    /* 5 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4,  target_len: 32  }, row: ROW_CONFIG },
127    /* 6 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8,  target_len: 48  }, row: ROW_CONFIG },
128    /* 7 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48  }, row: ROW_CONFIG },
129    /* 8 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64  }, row: ROW_CONFIG },
130    /* 9 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64  }, row: ROW_CONFIG },
131    /*10 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96  }, row: ROW_CONFIG },
132    /*11 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
133    /*12 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
134    /*13 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
135    /*14 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
136    /*15 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
137    /*16 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
138    /*17 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
139    /*18 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
140    /*19 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
141    /*20 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
142    /*21 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
143    /*22 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
144];
145
146/// Smallest window_log the encoder will use regardless of source size.
147const MIN_WINDOW_LOG: u8 = 10;
148
149/// Adjust level parameters for a known source size.
150///
151/// This derives a cap from `ceil(log2(src_size))`, then clamps it to
152/// [`MIN_WINDOW_LOG`]. A zero-byte size hint is treated as
153/// [`MIN_WINDOW_LOG`]. This keeps tables bounded for
154/// small inputs while preserving the encoder's minimum supported window.
155/// For the HC backend, `hash_log` and `chain_log` are reduced
156/// proportionally.
157fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
158    // Derive a source-size-based cap from ceil(log2(src_size)), then
159    // clamp to MIN_WINDOW_LOG. For inputs smaller than 1 KiB (or zero) we keep the
160    // 1 KiB minimum window instead of shrinking below that floor.
161    let src_log = if src_size == 0 {
162        MIN_WINDOW_LOG
163    } else {
164        (64 - (src_size - 1).leading_zeros()) as u8 // ceil_log2
165    };
166    let src_log = src_log.max(MIN_WINDOW_LOG);
167    if src_log < params.window_log {
168        params.window_log = src_log;
169    }
170    // For HC backend: also cap hash_log and chain_log so tables are
171    // proportional to the source, avoiding multi-MB allocations for
172    // tiny inputs.
173    if params.backend == MatcherBackend::HashChain {
174        if (src_log + 2) < params.hc.hash_log as u8 {
175            params.hc.hash_log = (src_log + 2) as usize;
176        }
177        if (src_log + 1) < params.hc.chain_log as u8 {
178            params.hc.chain_log = (src_log + 1) as usize;
179        }
180    } else if params.backend == MatcherBackend::Row {
181        let max_window_size = 1usize << params.window_log;
182        params.row.hash_bits = row_hash_bits_for_window(max_window_size);
183    }
184    params
185}
186
187/// Resolve a [`CompressionLevel`] to internal tuning parameters,
188/// optionally adjusted for a known source size.
189fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
190    let params = match level {
191        CompressionLevel::Uncompressed => LevelParams {
192            backend: MatcherBackend::Simple,
193            window_log: 17,
194            hash_fill_step: 1,
195            lazy_depth: 0,
196            hc: HC_CONFIG,
197            row: ROW_CONFIG,
198        },
199        CompressionLevel::Fastest => LEVEL_TABLE[0],
200        CompressionLevel::Default => LEVEL_TABLE[2],
201        CompressionLevel::Better => LEVEL_TABLE[6],
202        CompressionLevel::Best => LEVEL_TABLE[10],
203        CompressionLevel::Level(n) => {
204            if n > 0 {
205                let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
206                LEVEL_TABLE[idx]
207            } else if n == 0 {
208                // Level 0 = default, matching C zstd semantics.
209                LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
210            } else {
211                // Negative levels: ultra-fast with the Simple backend.
212                // Acceleration grows with magnitude, expressed as larger
213                // hash_fill_step (fewer positions indexed).
214                let acceleration =
215                    (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
216                let step = (acceleration + 3).min(128);
217                LevelParams {
218                    backend: MatcherBackend::Simple,
219                    window_log: 17,
220                    hash_fill_step: step,
221                    lazy_depth: 0,
222                    hc: HC_CONFIG,
223                    row: ROW_CONFIG,
224                }
225            }
226        }
227    };
228    if let Some(size) = source_size {
229        adjust_params_for_source_size(params, size)
230    } else {
231        params
232    }
233}
234
235#[derive(Copy, Clone, Debug, PartialEq, Eq)]
236enum MatcherBackend {
237    Simple,
238    Dfast,
239    Row,
240    HashChain,
241}
242
243/// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible.
244pub struct MatchGeneratorDriver {
245    vec_pool: Vec<Vec<u8>>,
246    suffix_pool: Vec<SuffixStore>,
247    match_generator: MatchGenerator,
248    dfast_match_generator: Option<DfastMatchGenerator>,
249    row_match_generator: Option<RowMatchGenerator>,
250    hc_match_generator: Option<HcMatchGenerator>,
251    active_backend: MatcherBackend,
252    slice_size: usize,
253    base_slice_size: usize,
254    // Frame header window size must stay at the configured live-window budget.
255    // Dictionary retention expands internal matcher capacity only.
256    reported_window_size: usize,
257    // Tracks currently retained bytes that originated from primed dictionary
258    // history and have not been evicted yet.
259    dictionary_retained_budget: usize,
260    // Source size hint for next frame (set via set_source_size_hint, cleared on reset).
261    source_size_hint: Option<u64>,
262}
263
264impl MatchGeneratorDriver {
265    /// `slice_size` sets the base block allocation size used for matcher input chunks.
266    /// `max_slices_in_window` determines the initial window capacity at construction
267    /// time. Effective window sizing is recalculated on every [`reset`](Self::reset)
268    /// from the resolved compression level and optional source-size hint.
269    pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
270        let max_window_size = max_slices_in_window * slice_size;
271        Self {
272            vec_pool: Vec::new(),
273            suffix_pool: Vec::new(),
274            match_generator: MatchGenerator::new(max_window_size),
275            dfast_match_generator: None,
276            row_match_generator: None,
277            hc_match_generator: None,
278            active_backend: MatcherBackend::Simple,
279            slice_size,
280            base_slice_size: slice_size,
281            reported_window_size: max_window_size,
282            dictionary_retained_budget: 0,
283            source_size_hint: None,
284        }
285    }
286
287    fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
288        resolve_level_params(level, source_size)
289    }
290
291    fn dfast_matcher(&self) -> &DfastMatchGenerator {
292        self.dfast_match_generator
293            .as_ref()
294            .expect("dfast backend must be initialized by reset() before use")
295    }
296
297    fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
298        self.dfast_match_generator
299            .as_mut()
300            .expect("dfast backend must be initialized by reset() before use")
301    }
302
303    fn row_matcher(&self) -> &RowMatchGenerator {
304        self.row_match_generator
305            .as_ref()
306            .expect("row backend must be initialized by reset() before use")
307    }
308
309    fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
310        self.row_match_generator
311            .as_mut()
312            .expect("row backend must be initialized by reset() before use")
313    }
314
315    fn hc_matcher(&self) -> &HcMatchGenerator {
316        self.hc_match_generator
317            .as_ref()
318            .expect("hash chain backend must be initialized by reset() before use")
319    }
320
321    fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
322        self.hc_match_generator
323            .as_mut()
324            .expect("hash chain backend must be initialized by reset() before use")
325    }
326
327    fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
328        let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
329        if reclaimed == 0 {
330            return;
331        }
332        self.dictionary_retained_budget -= reclaimed;
333        match self.active_backend {
334            MatcherBackend::Simple => {
335                self.match_generator.max_window_size = self
336                    .match_generator
337                    .max_window_size
338                    .saturating_sub(reclaimed);
339            }
340            MatcherBackend::Dfast => {
341                let matcher = self.dfast_matcher_mut();
342                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
343            }
344            MatcherBackend::Row => {
345                let matcher = self.row_matcher_mut();
346                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
347            }
348            MatcherBackend::HashChain => {
349                let matcher = self.hc_matcher_mut();
350                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
351            }
352        }
353    }
354
355    fn trim_after_budget_retire(&mut self) {
356        loop {
357            let mut evicted_bytes = 0usize;
358            match self.active_backend {
359                MatcherBackend::Simple => {
360                    let vec_pool = &mut self.vec_pool;
361                    let suffix_pool = &mut self.suffix_pool;
362                    self.match_generator.reserve(0, |mut data, mut suffixes| {
363                        evicted_bytes += data.len();
364                        data.resize(data.capacity(), 0);
365                        vec_pool.push(data);
366                        suffixes.slots.clear();
367                        suffixes.slots.resize(suffixes.slots.capacity(), None);
368                        suffix_pool.push(suffixes);
369                    });
370                }
371                MatcherBackend::Dfast => {
372                    let mut retired = Vec::new();
373                    self.dfast_matcher_mut().trim_to_window(|data| {
374                        evicted_bytes += data.len();
375                        retired.push(data);
376                    });
377                    for mut data in retired {
378                        data.resize(data.capacity(), 0);
379                        self.vec_pool.push(data);
380                    }
381                }
382                MatcherBackend::Row => {
383                    let mut retired = Vec::new();
384                    self.row_matcher_mut().trim_to_window(|data| {
385                        evicted_bytes += data.len();
386                        retired.push(data);
387                    });
388                    for mut data in retired {
389                        data.resize(data.capacity(), 0);
390                        self.vec_pool.push(data);
391                    }
392                }
393                MatcherBackend::HashChain => {
394                    let mut retired = Vec::new();
395                    self.hc_matcher_mut().trim_to_window(|data| {
396                        evicted_bytes += data.len();
397                        retired.push(data);
398                    });
399                    for mut data in retired {
400                        data.resize(data.capacity(), 0);
401                        self.vec_pool.push(data);
402                    }
403                }
404            }
405            if evicted_bytes == 0 {
406                break;
407            }
408            self.retire_dictionary_budget(evicted_bytes);
409        }
410    }
411}
412
413impl Matcher for MatchGeneratorDriver {
414    fn supports_dictionary_priming(&self) -> bool {
415        true
416    }
417
418    fn set_source_size_hint(&mut self, size: u64) {
419        self.source_size_hint = Some(size);
420    }
421
422    fn reset(&mut self, level: CompressionLevel) {
423        let hint = self.source_size_hint.take();
424        let hinted = hint.is_some();
425        let params = Self::level_params(level, hint);
426        let max_window_size = 1usize << params.window_log;
427        self.dictionary_retained_budget = 0;
428        if self.active_backend != params.backend {
429            match self.active_backend {
430                MatcherBackend::Simple => {
431                    let vec_pool = &mut self.vec_pool;
432                    let suffix_pool = &mut self.suffix_pool;
433                    self.match_generator.reset(|mut data, mut suffixes| {
434                        data.resize(data.capacity(), 0);
435                        vec_pool.push(data);
436                        suffixes.slots.clear();
437                        suffixes.slots.resize(suffixes.slots.capacity(), None);
438                        suffix_pool.push(suffixes);
439                    });
440                }
441                MatcherBackend::Dfast => {
442                    if let Some(dfast) = self.dfast_match_generator.as_mut() {
443                        let vec_pool = &mut self.vec_pool;
444                        dfast.reset(|mut data| {
445                            data.resize(data.capacity(), 0);
446                            vec_pool.push(data);
447                        });
448                    }
449                }
450                MatcherBackend::Row => {
451                    if let Some(row) = self.row_match_generator.as_mut() {
452                        row.row_heads = Vec::new();
453                        row.row_positions = Vec::new();
454                        row.row_tags = Vec::new();
455                        let vec_pool = &mut self.vec_pool;
456                        row.reset(|mut data| {
457                            data.resize(data.capacity(), 0);
458                            vec_pool.push(data);
459                        });
460                    }
461                }
462                MatcherBackend::HashChain => {
463                    if let Some(hc) = self.hc_match_generator.as_mut() {
464                        // Release oversized tables when switching away from
465                        // HashChain so Best's larger allocations don't persist.
466                        hc.hash_table = Vec::new();
467                        hc.chain_table = Vec::new();
468                        let vec_pool = &mut self.vec_pool;
469                        hc.reset(|mut data| {
470                            data.resize(data.capacity(), 0);
471                            vec_pool.push(data);
472                        });
473                    }
474                }
475            }
476        }
477
478        self.active_backend = params.backend;
479        self.slice_size = self.base_slice_size.min(max_window_size);
480        self.reported_window_size = max_window_size;
481        match self.active_backend {
482            MatcherBackend::Simple => {
483                let vec_pool = &mut self.vec_pool;
484                let suffix_pool = &mut self.suffix_pool;
485                self.match_generator.max_window_size = max_window_size;
486                self.match_generator.hash_fill_step = params.hash_fill_step;
487                self.match_generator.reset(|mut data, mut suffixes| {
488                    data.resize(data.capacity(), 0);
489                    vec_pool.push(data);
490                    suffixes.slots.clear();
491                    suffixes.slots.resize(suffixes.slots.capacity(), None);
492                    suffix_pool.push(suffixes);
493                });
494            }
495            MatcherBackend::Dfast => {
496                let dfast = self
497                    .dfast_match_generator
498                    .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
499                dfast.max_window_size = max_window_size;
500                dfast.lazy_depth = params.lazy_depth;
501                dfast.set_hash_bits(if hinted {
502                    dfast_hash_bits_for_window(max_window_size)
503                } else {
504                    DFAST_HASH_BITS
505                });
506                let vec_pool = &mut self.vec_pool;
507                dfast.reset(|mut data| {
508                    data.resize(data.capacity(), 0);
509                    vec_pool.push(data);
510                });
511            }
512            MatcherBackend::Row => {
513                let row = self
514                    .row_match_generator
515                    .get_or_insert_with(|| RowMatchGenerator::new(max_window_size));
516                row.max_window_size = max_window_size;
517                row.lazy_depth = params.lazy_depth;
518                row.configure(params.row);
519                if hinted {
520                    row.set_hash_bits(row_hash_bits_for_window(max_window_size));
521                }
522                let vec_pool = &mut self.vec_pool;
523                row.reset(|mut data| {
524                    data.resize(data.capacity(), 0);
525                    vec_pool.push(data);
526                });
527            }
528            MatcherBackend::HashChain => {
529                let hc = self
530                    .hc_match_generator
531                    .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
532                hc.max_window_size = max_window_size;
533                hc.lazy_depth = params.lazy_depth;
534                hc.configure(params.hc);
535                let vec_pool = &mut self.vec_pool;
536                hc.reset(|mut data| {
537                    data.resize(data.capacity(), 0);
538                    vec_pool.push(data);
539                });
540            }
541        }
542    }
543
544    fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
545        match self.active_backend {
546            MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
547            MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
548            MatcherBackend::Row => self.row_matcher_mut().offset_hist = offset_hist,
549            MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
550        }
551
552        if dict_content.is_empty() {
553            return;
554        }
555
556        // Dictionary bytes should stay addressable until produced frame output
557        // itself exceeds the live window size.
558        let retained_dict_budget = dict_content.len();
559        match self.active_backend {
560            MatcherBackend::Simple => {
561                self.match_generator.max_window_size = self
562                    .match_generator
563                    .max_window_size
564                    .saturating_add(retained_dict_budget);
565            }
566            MatcherBackend::Dfast => {
567                let matcher = self.dfast_matcher_mut();
568                matcher.max_window_size =
569                    matcher.max_window_size.saturating_add(retained_dict_budget);
570            }
571            MatcherBackend::Row => {
572                let matcher = self.row_matcher_mut();
573                matcher.max_window_size =
574                    matcher.max_window_size.saturating_add(retained_dict_budget);
575            }
576            MatcherBackend::HashChain => {
577                let matcher = self.hc_matcher_mut();
578                matcher.max_window_size =
579                    matcher.max_window_size.saturating_add(retained_dict_budget);
580            }
581        }
582
583        let mut start = 0usize;
584        let mut committed_dict_budget = 0usize;
585        // insert_position needs 4 bytes of lookahead for hashing;
586        // backfill_boundary_positions re-visits tail positions once the
587        // next slice extends history, but cannot hash <4 byte fragments.
588        let min_primed_tail = match self.active_backend {
589            MatcherBackend::Simple => MIN_MATCH_LEN,
590            MatcherBackend::Dfast | MatcherBackend::Row | MatcherBackend::HashChain => 4,
591        };
592        while start < dict_content.len() {
593            let end = (start + self.slice_size).min(dict_content.len());
594            if end - start < min_primed_tail {
595                break;
596            }
597            let mut space = self.get_next_space();
598            space.clear();
599            space.extend_from_slice(&dict_content[start..end]);
600            self.commit_space(space);
601            self.skip_matching();
602            committed_dict_budget += end - start;
603            start = end;
604        }
605
606        let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
607        if uncommitted_tail_budget > 0 {
608            match self.active_backend {
609                MatcherBackend::Simple => {
610                    self.match_generator.max_window_size = self
611                        .match_generator
612                        .max_window_size
613                        .saturating_sub(uncommitted_tail_budget);
614                }
615                MatcherBackend::Dfast => {
616                    let matcher = self.dfast_matcher_mut();
617                    matcher.max_window_size = matcher
618                        .max_window_size
619                        .saturating_sub(uncommitted_tail_budget);
620                }
621                MatcherBackend::Row => {
622                    let matcher = self.row_matcher_mut();
623                    matcher.max_window_size = matcher
624                        .max_window_size
625                        .saturating_sub(uncommitted_tail_budget);
626                }
627                MatcherBackend::HashChain => {
628                    let matcher = self.hc_matcher_mut();
629                    matcher.max_window_size = matcher
630                        .max_window_size
631                        .saturating_sub(uncommitted_tail_budget);
632                }
633            }
634        }
635        if committed_dict_budget > 0 {
636            self.dictionary_retained_budget = self
637                .dictionary_retained_budget
638                .saturating_add(committed_dict_budget);
639        }
640    }
641
642    fn window_size(&self) -> u64 {
643        self.reported_window_size as u64
644    }
645
646    fn get_next_space(&mut self) -> Vec<u8> {
647        if let Some(mut space) = self.vec_pool.pop() {
648            if space.len() > self.slice_size {
649                space.truncate(self.slice_size);
650            }
651            if space.len() < self.slice_size {
652                space.resize(self.slice_size, 0);
653            }
654            return space;
655        }
656        alloc::vec![0; self.slice_size]
657    }
658
659    fn get_last_space(&mut self) -> &[u8] {
660        match self.active_backend {
661            MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
662            MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
663            MatcherBackend::Row => self.row_matcher().get_last_space(),
664            MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
665        }
666    }
667
668    fn commit_space(&mut self, space: Vec<u8>) {
669        match self.active_backend {
670            MatcherBackend::Simple => {
671                let vec_pool = &mut self.vec_pool;
672                let mut evicted_bytes = 0usize;
673                let suffixes = match self.suffix_pool.pop() {
674                    Some(store) if store.slots.len() >= space.len() => store,
675                    _ => SuffixStore::with_capacity(space.len()),
676                };
677                let suffix_pool = &mut self.suffix_pool;
678                self.match_generator
679                    .add_data(space, suffixes, |mut data, mut suffixes| {
680                        evicted_bytes += data.len();
681                        data.resize(data.capacity(), 0);
682                        vec_pool.push(data);
683                        suffixes.slots.clear();
684                        suffixes.slots.resize(suffixes.slots.capacity(), None);
685                        suffix_pool.push(suffixes);
686                    });
687                self.retire_dictionary_budget(evicted_bytes);
688                self.trim_after_budget_retire();
689            }
690            MatcherBackend::Dfast => {
691                let vec_pool = &mut self.vec_pool;
692                let mut evicted_bytes = 0usize;
693                self.dfast_match_generator
694                    .as_mut()
695                    .expect("dfast backend must be initialized by reset() before use")
696                    .add_data(space, |mut data| {
697                        evicted_bytes += data.len();
698                        data.resize(data.capacity(), 0);
699                        vec_pool.push(data);
700                    });
701                self.retire_dictionary_budget(evicted_bytes);
702                self.trim_after_budget_retire();
703            }
704            MatcherBackend::Row => {
705                let vec_pool = &mut self.vec_pool;
706                let mut evicted_bytes = 0usize;
707                self.row_match_generator
708                    .as_mut()
709                    .expect("row backend must be initialized by reset() before use")
710                    .add_data(space, |mut data| {
711                        evicted_bytes += data.len();
712                        data.resize(data.capacity(), 0);
713                        vec_pool.push(data);
714                    });
715                self.retire_dictionary_budget(evicted_bytes);
716                self.trim_after_budget_retire();
717            }
718            MatcherBackend::HashChain => {
719                let vec_pool = &mut self.vec_pool;
720                let mut evicted_bytes = 0usize;
721                self.hc_match_generator
722                    .as_mut()
723                    .expect("hash chain backend must be initialized by reset() before use")
724                    .add_data(space, |mut data| {
725                        evicted_bytes += data.len();
726                        data.resize(data.capacity(), 0);
727                        vec_pool.push(data);
728                    });
729                self.retire_dictionary_budget(evicted_bytes);
730                self.trim_after_budget_retire();
731            }
732        }
733    }
734
735    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
736        match self.active_backend {
737            MatcherBackend::Simple => {
738                while self.match_generator.next_sequence(&mut handle_sequence) {}
739            }
740            MatcherBackend::Dfast => self
741                .dfast_matcher_mut()
742                .start_matching(&mut handle_sequence),
743            MatcherBackend::Row => self.row_matcher_mut().start_matching(&mut handle_sequence),
744            MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
745        }
746    }
747    fn skip_matching(&mut self) {
748        match self.active_backend {
749            MatcherBackend::Simple => self.match_generator.skip_matching(),
750            MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
751            MatcherBackend::Row => self.row_matcher_mut().skip_matching(),
752            MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(),
753        }
754    }
755}
756
757/// This stores the index of a suffix of a string by hashing the first few bytes of that suffix
758/// This means that collisions just overwrite and that you need to check validity after a get
759struct SuffixStore {
760    // We use NonZeroUsize to enable niche optimization here.
761    // On store we do +1 and on get -1
762    // This is ok since usize::MAX is never a valid offset
763    slots: Vec<Option<NonZeroUsize>>,
764    len_log: u32,
765}
766
767impl SuffixStore {
768    fn with_capacity(capacity: usize) -> Self {
769        Self {
770            slots: alloc::vec![None; capacity],
771            len_log: capacity.ilog2(),
772        }
773    }
774
775    #[inline(always)]
776    fn insert(&mut self, suffix: &[u8], idx: usize) {
777        let key = self.key(suffix);
778        self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
779    }
780
781    #[inline(always)]
782    fn contains_key(&self, suffix: &[u8]) -> bool {
783        let key = self.key(suffix);
784        self.slots[key].is_some()
785    }
786
787    #[inline(always)]
788    fn get(&self, suffix: &[u8]) -> Option<usize> {
789        let key = self.key(suffix);
790        self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
791    }
792
793    #[inline(always)]
794    fn key(&self, suffix: &[u8]) -> usize {
795        // Capacity=1 yields len_log=0; shifting by 64 would panic.
796        if self.len_log == 0 {
797            return 0;
798        }
799
800        let s0 = suffix[0] as u64;
801        let s1 = suffix[1] as u64;
802        let s2 = suffix[2] as u64;
803        let s3 = suffix[3] as u64;
804        let s4 = suffix[4] as u64;
805
806        const POLY: u64 = 0xCF3BCCDCABu64;
807
808        let s0 = (s0 << 24).wrapping_mul(POLY);
809        let s1 = (s1 << 32).wrapping_mul(POLY);
810        let s2 = (s2 << 40).wrapping_mul(POLY);
811        let s3 = (s3 << 48).wrapping_mul(POLY);
812        let s4 = (s4 << 56).wrapping_mul(POLY);
813
814        let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
815        let index = index >> (64 - self.len_log);
816        index as usize % self.slots.len()
817    }
818}
819
820/// We keep a window of a few of these entries
821/// All of these are valid targets for a match to be generated for
822struct WindowEntry {
823    data: Vec<u8>,
824    /// Stores indexes into data
825    suffixes: SuffixStore,
826    /// Makes offset calculations efficient
827    base_offset: usize,
828}
829
830pub(crate) struct MatchGenerator {
831    max_window_size: usize,
832    /// Data window we are operating on to find matches
833    /// The data we want to find matches for is in the last slice
834    window: Vec<WindowEntry>,
835    window_size: usize,
836    #[cfg(debug_assertions)]
837    concat_window: Vec<u8>,
838    /// Index in the last slice that we already processed
839    suffix_idx: usize,
840    /// Gets updated when a new sequence is returned to point right behind that sequence
841    last_idx_in_sequence: usize,
842    hash_fill_step: usize,
843    offset_hist: [u32; 3],
844}
845
846impl MatchGenerator {
847    #[inline(always)]
848    #[cfg(target_endian = "little")]
849    fn mismatch_byte_index(diff: usize) -> usize {
850        diff.trailing_zeros() as usize / 8
851    }
852
853    #[inline(always)]
854    #[cfg(target_endian = "big")]
855    fn mismatch_byte_index(diff: usize) -> usize {
856        diff.leading_zeros() as usize / 8
857    }
858
859    /// max_size defines how many bytes will be used at most in the window used for matching
860    fn new(max_size: usize) -> Self {
861        Self {
862            max_window_size: max_size,
863            window: Vec::new(),
864            window_size: 0,
865            #[cfg(debug_assertions)]
866            concat_window: Vec::new(),
867            suffix_idx: 0,
868            last_idx_in_sequence: 0,
869            hash_fill_step: 1,
870            offset_hist: [1, 4, 8],
871        }
872    }
873
874    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
875        self.window_size = 0;
876        #[cfg(debug_assertions)]
877        self.concat_window.clear();
878        self.suffix_idx = 0;
879        self.last_idx_in_sequence = 0;
880        self.offset_hist = [1, 4, 8];
881        self.window.drain(..).for_each(|entry| {
882            reuse_space(entry.data, entry.suffixes);
883        });
884    }
885
886    /// Processes bytes in the current window until either a match is found or no more matches can be found
887    /// * If a match is found handle_sequence is called with the Triple variant
888    /// * If no more matches can be found but there are bytes still left handle_sequence is called with the Literals variant
889    /// * If no more matches can be found and no more bytes are left this returns false
890    fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
891        loop {
892            let last_entry = self.window.last().unwrap();
893            let data_slice = &last_entry.data;
894
895            // We already reached the end of the window, check if we need to return a Literals{}
896            if self.suffix_idx >= data_slice.len() {
897                if self.last_idx_in_sequence != self.suffix_idx {
898                    let literals = &data_slice[self.last_idx_in_sequence..];
899                    self.last_idx_in_sequence = self.suffix_idx;
900                    handle_sequence(Sequence::Literals { literals });
901                    return true;
902                } else {
903                    return false;
904                }
905            }
906
907            // If the remaining data is smaller than the minimum match length we can stop and return a Literals{}
908            let data_slice = &data_slice[self.suffix_idx..];
909            if data_slice.len() < MIN_MATCH_LEN {
910                let last_idx_in_sequence = self.last_idx_in_sequence;
911                self.last_idx_in_sequence = last_entry.data.len();
912                self.suffix_idx = last_entry.data.len();
913                handle_sequence(Sequence::Literals {
914                    literals: &last_entry.data[last_idx_in_sequence..],
915                });
916                return true;
917            }
918
919            // This is the key we are looking to find a match for
920            let key = &data_slice[..MIN_MATCH_LEN];
921            let literals_len = self.suffix_idx - self.last_idx_in_sequence;
922
923            // Look in each window entry
924            let mut candidate = self.repcode_candidate(data_slice, literals_len);
925            for match_entry in self.window.iter() {
926                if let Some(match_index) = match_entry.suffixes.get(key) {
927                    let match_slice = &match_entry.data[match_index..];
928
929                    // Check how long the common prefix actually is
930                    let match_len = Self::common_prefix_len(match_slice, data_slice);
931
932                    // Collisions in the suffix store might make this check fail
933                    if match_len >= MIN_MATCH_LEN {
934                        let offset = match_entry.base_offset + self.suffix_idx - match_index;
935
936                        // If we are in debug/tests make sure the match we found is actually at the offset we calculated
937                        #[cfg(debug_assertions)]
938                        {
939                            let unprocessed = last_entry.data.len() - self.suffix_idx;
940                            let start = self.concat_window.len() - unprocessed - offset;
941                            let end = start + match_len;
942                            let check_slice = &self.concat_window[start..end];
943                            debug_assert_eq!(check_slice, &match_slice[..match_len]);
944                        }
945
946                        if let Some((old_offset, old_match_len)) = candidate {
947                            if match_len > old_match_len
948                                || (match_len == old_match_len && offset < old_offset)
949                            {
950                                candidate = Some((offset, match_len));
951                            }
952                        } else {
953                            candidate = Some((offset, match_len));
954                        }
955                    }
956                }
957            }
958
959            if let Some((offset, match_len)) = candidate {
960                // For each index in the match we found we do not need to look for another match
961                // But we still want them registered in the suffix store
962                self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
963
964                // All literals that were not included between this match and the last are now included here
965                let last_entry = self.window.last().unwrap();
966                let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
967
968                // Update the indexes, all indexes upto and including the current index have been included in a sequence now
969                self.suffix_idx += match_len;
970                self.last_idx_in_sequence = self.suffix_idx;
971                let _ = encode_offset_with_history(
972                    offset as u32,
973                    literals.len() as u32,
974                    &mut self.offset_hist,
975                );
976                handle_sequence(Sequence::Triple {
977                    literals,
978                    offset,
979                    match_len,
980                });
981
982                return true;
983            }
984
985            let last_entry = self.window.last_mut().unwrap();
986            let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
987            if !last_entry.suffixes.contains_key(key) {
988                last_entry.suffixes.insert(key, self.suffix_idx);
989            }
990            self.suffix_idx += 1;
991        }
992    }
993
994    /// Find the common prefix length between two byte slices
995    #[inline(always)]
996    fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
997        let max = a.len().min(b.len());
998        let chunk = core::mem::size_of::<usize>();
999        let mut off = 0usize;
1000        let lhs = a.as_ptr();
1001        let rhs = b.as_ptr();
1002
1003        while off + chunk <= max {
1004            let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
1005            let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
1006            let diff = lhs_word ^ rhs_word;
1007            if diff != 0 {
1008                return off + Self::mismatch_byte_index(diff);
1009            }
1010            off += chunk;
1011        }
1012
1013        off + core::iter::zip(&a[off..max], &b[off..max])
1014            .take_while(|(x, y)| x == y)
1015            .count()
1016    }
1017
1018    /// Process bytes and add the suffixes to the suffix store up to a specific index
1019    #[inline(always)]
1020    fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
1021        let start = self.suffix_idx;
1022        let last_entry = self.window.last_mut().unwrap();
1023        if last_entry.data.len() < MIN_MATCH_LEN {
1024            return;
1025        }
1026        let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
1027        if insert_limit > start {
1028            let data = last_entry.data.as_slice();
1029            let suffixes = &mut last_entry.suffixes;
1030            if fill_step == FAST_HASH_FILL_STEP {
1031                Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
1032            } else {
1033                let mut pos = start;
1034                while pos < insert_limit {
1035                    Self::insert_suffix_if_absent(data, suffixes, pos);
1036                    pos += fill_step;
1037                }
1038            }
1039        }
1040
1041        if idx >= start + MIN_MATCH_LEN {
1042            let tail_start = idx - MIN_MATCH_LEN;
1043            let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
1044            if !last_entry.suffixes.contains_key(tail_key) {
1045                last_entry.suffixes.insert(tail_key, tail_start);
1046            }
1047        }
1048    }
1049
1050    #[inline(always)]
1051    fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
1052        debug_assert!(
1053            pos + MIN_MATCH_LEN <= data.len(),
1054            "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
1055            pos,
1056            MIN_MATCH_LEN,
1057            data.len()
1058        );
1059        let key = &data[pos..pos + MIN_MATCH_LEN];
1060        if !suffixes.contains_key(key) {
1061            suffixes.insert(key, pos);
1062        }
1063    }
1064
1065    #[inline(always)]
1066    fn add_suffixes_interleaved_fast(
1067        data: &[u8],
1068        suffixes: &mut SuffixStore,
1069        start: usize,
1070        insert_limit: usize,
1071    ) {
1072        let lane = FAST_HASH_FILL_STEP;
1073        let mut pos = start;
1074
1075        // Pipeline-ish fill: compute and retire several hash positions per loop
1076        // so the fastest path keeps multiple independent hash lookups in flight.
1077        while pos + lane * 3 < insert_limit {
1078            let p0 = pos;
1079            let p1 = pos + lane;
1080            let p2 = pos + lane * 2;
1081            let p3 = pos + lane * 3;
1082
1083            Self::insert_suffix_if_absent(data, suffixes, p0);
1084            Self::insert_suffix_if_absent(data, suffixes, p1);
1085            Self::insert_suffix_if_absent(data, suffixes, p2);
1086            Self::insert_suffix_if_absent(data, suffixes, p3);
1087
1088            pos += lane * 4;
1089        }
1090
1091        while pos < insert_limit {
1092            Self::insert_suffix_if_absent(data, suffixes, pos);
1093            pos += lane;
1094        }
1095    }
1096
1097    fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
1098        if literals_len != 0 {
1099            return None;
1100        }
1101
1102        let reps = [
1103            Some(self.offset_hist[1] as usize),
1104            Some(self.offset_hist[2] as usize),
1105            (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1106        ];
1107
1108        let mut best: Option<(usize, usize)> = None;
1109        let mut seen = [0usize; 3];
1110        let mut seen_len = 0usize;
1111        for offset in reps.into_iter().flatten() {
1112            if offset == 0 {
1113                continue;
1114            }
1115            if seen[..seen_len].contains(&offset) {
1116                continue;
1117            }
1118            seen[seen_len] = offset;
1119            seen_len += 1;
1120
1121            let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1122                continue;
1123            };
1124            if match_len < MIN_MATCH_LEN {
1125                continue;
1126            }
1127
1128            if best.is_none_or(|(old_offset, old_len)| {
1129                match_len > old_len || (match_len == old_len && offset < old_offset)
1130            }) {
1131                best = Some((offset, match_len));
1132            }
1133        }
1134        best
1135    }
1136
1137    fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1138        if offset == 0 {
1139            return None;
1140        }
1141
1142        let last_idx = self.window.len().checked_sub(1)?;
1143        let last_entry = &self.window[last_idx];
1144        let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1145        if offset > searchable_prefix {
1146            return None;
1147        }
1148
1149        let mut remaining = offset;
1150        let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1151            (last_idx, self.suffix_idx - remaining)
1152        } else {
1153            remaining -= self.suffix_idx;
1154            let mut found = None;
1155            for entry_idx in (0..last_idx).rev() {
1156                let len = self.window[entry_idx].data.len();
1157                if remaining <= len {
1158                    found = Some((entry_idx, len - remaining));
1159                    break;
1160                }
1161                remaining -= len;
1162            }
1163            found?
1164        };
1165
1166        let match_entry = &self.window[entry_idx];
1167        let match_slice = &match_entry.data[match_index..];
1168
1169        Some(Self::common_prefix_len(match_slice, data_slice))
1170    }
1171
1172    /// Skip matching for the whole current window entry
1173    fn skip_matching(&mut self) {
1174        let len = self.window.last().unwrap().data.len();
1175        self.add_suffixes_till(len, 1);
1176        self.suffix_idx = len;
1177        self.last_idx_in_sequence = len;
1178    }
1179
1180    /// Add a new window entry. Will panic if the last window entry hasn't been processed properly.
1181    /// If any resources are released by pushing the new entry they are returned via the callback
1182    fn add_data(
1183        &mut self,
1184        data: Vec<u8>,
1185        suffixes: SuffixStore,
1186        reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1187    ) {
1188        assert!(
1189            self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1190        );
1191        self.reserve(data.len(), reuse_space);
1192        #[cfg(debug_assertions)]
1193        self.concat_window.extend_from_slice(&data);
1194
1195        if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1196            for entry in self.window.iter_mut() {
1197                entry.base_offset += last_len;
1198            }
1199        }
1200
1201        let len = data.len();
1202        self.window.push(WindowEntry {
1203            data,
1204            suffixes,
1205            base_offset: 0,
1206        });
1207        self.window_size += len;
1208        self.suffix_idx = 0;
1209        self.last_idx_in_sequence = 0;
1210    }
1211
1212    /// Reserve space for a new window entry
1213    /// If any resources are released by pushing the new entry they are returned via the callback
1214    fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1215        assert!(self.max_window_size >= amount);
1216        while self.window_size + amount > self.max_window_size {
1217            let removed = self.window.remove(0);
1218            self.window_size -= removed.data.len();
1219            #[cfg(debug_assertions)]
1220            self.concat_window.drain(0..removed.data.len());
1221
1222            let WindowEntry {
1223                suffixes,
1224                data: leaked_vec,
1225                base_offset: _,
1226            } = removed;
1227            reuse_space(leaked_vec, suffixes);
1228        }
1229    }
1230}
1231
1232struct DfastMatchGenerator {
1233    max_window_size: usize,
1234    window: VecDeque<Vec<u8>>,
1235    window_size: usize,
1236    // We keep a contiguous searchable history to avoid rebuilding and reseeding
1237    // the matcher state from disjoint block buffers on every block.
1238    history: Vec<u8>,
1239    history_start: usize,
1240    history_abs_start: usize,
1241    offset_hist: [u32; 3],
1242    short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1243    long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1244    hash_bits: usize,
1245    // Lazy match lookahead depth (internal tuning parameter).
1246    lazy_depth: u8,
1247}
1248
1249#[derive(Copy, Clone)]
1250struct MatchCandidate {
1251    start: usize,
1252    offset: usize,
1253    match_len: usize,
1254}
1255
1256fn best_len_offset_candidate(
1257    lhs: Option<MatchCandidate>,
1258    rhs: Option<MatchCandidate>,
1259) -> Option<MatchCandidate> {
1260    match (lhs, rhs) {
1261        (None, other) | (other, None) => other,
1262        (Some(lhs), Some(rhs)) => {
1263            if rhs.match_len > lhs.match_len
1264                || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1265            {
1266                Some(rhs)
1267            } else {
1268                Some(lhs)
1269            }
1270        }
1271    }
1272}
1273
1274fn extend_backwards_shared(
1275    concat: &[u8],
1276    history_abs_start: usize,
1277    mut candidate_pos: usize,
1278    mut abs_pos: usize,
1279    mut match_len: usize,
1280    lit_len: usize,
1281) -> MatchCandidate {
1282    let min_abs_pos = abs_pos - lit_len;
1283    while abs_pos > min_abs_pos
1284        && candidate_pos > history_abs_start
1285        && concat[candidate_pos - history_abs_start - 1] == concat[abs_pos - history_abs_start - 1]
1286    {
1287        candidate_pos -= 1;
1288        abs_pos -= 1;
1289        match_len += 1;
1290    }
1291    MatchCandidate {
1292        start: abs_pos,
1293        offset: abs_pos - candidate_pos,
1294        match_len,
1295    }
1296}
1297
1298fn repcode_candidate_shared(
1299    concat: &[u8],
1300    history_abs_start: usize,
1301    offset_hist: [u32; 3],
1302    abs_pos: usize,
1303    lit_len: usize,
1304    min_match_len: usize,
1305) -> Option<MatchCandidate> {
1306    let reps = if lit_len == 0 {
1307        [
1308            Some(offset_hist[1] as usize),
1309            Some(offset_hist[2] as usize),
1310            (offset_hist[0] > 1).then_some((offset_hist[0] - 1) as usize),
1311        ]
1312    } else {
1313        [
1314            Some(offset_hist[0] as usize),
1315            Some(offset_hist[1] as usize),
1316            Some(offset_hist[2] as usize),
1317        ]
1318    };
1319
1320    let current_idx = abs_pos - history_abs_start;
1321    if current_idx + min_match_len > concat.len() {
1322        return None;
1323    }
1324
1325    let mut best = None;
1326    for rep in reps.into_iter().flatten() {
1327        if rep == 0 || rep > abs_pos {
1328            continue;
1329        }
1330        let candidate_pos = abs_pos - rep;
1331        if candidate_pos < history_abs_start {
1332            continue;
1333        }
1334        let candidate_idx = candidate_pos - history_abs_start;
1335        let match_len =
1336            MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1337        if match_len >= min_match_len {
1338            let candidate = extend_backwards_shared(
1339                concat,
1340                history_abs_start,
1341                candidate_pos,
1342                abs_pos,
1343                match_len,
1344                lit_len,
1345            );
1346            best = best_len_offset_candidate(best, Some(candidate));
1347        }
1348    }
1349    best
1350}
1351
1352#[derive(Copy, Clone)]
1353struct LazyMatchConfig {
1354    target_len: usize,
1355    min_match_len: usize,
1356    lazy_depth: u8,
1357    history_abs_end: usize,
1358}
1359
1360fn pick_lazy_match_shared(
1361    abs_pos: usize,
1362    lit_len: usize,
1363    best: Option<MatchCandidate>,
1364    config: LazyMatchConfig,
1365    mut best_match_at: impl FnMut(usize, usize) -> Option<MatchCandidate>,
1366) -> Option<MatchCandidate> {
1367    let best = best?;
1368    if best.match_len >= config.target_len
1369        || abs_pos + 1 + config.min_match_len > config.history_abs_end
1370    {
1371        return Some(best);
1372    }
1373
1374    let next = best_match_at(abs_pos + 1, lit_len + 1);
1375    if let Some(next) = next
1376        && (next.match_len > best.match_len
1377            || (next.match_len == best.match_len && next.offset < best.offset))
1378    {
1379        return None;
1380    }
1381
1382    if config.lazy_depth >= 2 && abs_pos + 2 + config.min_match_len <= config.history_abs_end {
1383        let next2 = best_match_at(abs_pos + 2, lit_len + 2);
1384        if let Some(next2) = next2
1385            && next2.match_len > best.match_len + 1
1386        {
1387            return None;
1388        }
1389    }
1390
1391    Some(best)
1392}
1393
1394impl DfastMatchGenerator {
1395    fn new(max_window_size: usize) -> Self {
1396        Self {
1397            max_window_size,
1398            window: VecDeque::new(),
1399            window_size: 0,
1400            history: Vec::new(),
1401            history_start: 0,
1402            history_abs_start: 0,
1403            offset_hist: [1, 4, 8],
1404            short_hash: Vec::new(),
1405            long_hash: Vec::new(),
1406            hash_bits: DFAST_HASH_BITS,
1407            lazy_depth: 1,
1408        }
1409    }
1410
1411    fn set_hash_bits(&mut self, bits: usize) {
1412        let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1413        if self.hash_bits != clamped {
1414            self.hash_bits = clamped;
1415            self.short_hash = Vec::new();
1416            self.long_hash = Vec::new();
1417        }
1418    }
1419
1420    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1421        self.window_size = 0;
1422        self.history.clear();
1423        self.history_start = 0;
1424        self.history_abs_start = 0;
1425        self.offset_hist = [1, 4, 8];
1426        if !self.short_hash.is_empty() {
1427            self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1428            self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1429        }
1430        for mut data in self.window.drain(..) {
1431            data.resize(data.capacity(), 0);
1432            reuse_space(data);
1433        }
1434    }
1435
1436    fn get_last_space(&self) -> &[u8] {
1437        self.window.back().unwrap().as_slice()
1438    }
1439
1440    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1441        assert!(data.len() <= self.max_window_size);
1442        while self.window_size + data.len() > self.max_window_size {
1443            let removed = self.window.pop_front().unwrap();
1444            self.window_size -= removed.len();
1445            self.history_start += removed.len();
1446            self.history_abs_start += removed.len();
1447            reuse_space(removed);
1448        }
1449        self.compact_history();
1450        self.history.extend_from_slice(&data);
1451        self.window_size += data.len();
1452        self.window.push_back(data);
1453    }
1454
1455    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1456        while self.window_size > self.max_window_size {
1457            let removed = self.window.pop_front().unwrap();
1458            self.window_size -= removed.len();
1459            self.history_start += removed.len();
1460            self.history_abs_start += removed.len();
1461            reuse_space(removed);
1462        }
1463    }
1464
1465    fn skip_matching(&mut self) {
1466        self.ensure_hash_tables();
1467        let current_len = self.window.back().unwrap().len();
1468        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1469        self.insert_positions(current_abs_start, current_abs_start + current_len);
1470    }
1471
1472    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1473        self.ensure_hash_tables();
1474
1475        let current_len = self.window.back().unwrap().len();
1476        if current_len == 0 {
1477            return;
1478        }
1479
1480        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1481
1482        let mut pos = 0usize;
1483        let mut literals_start = 0usize;
1484        while pos + DFAST_MIN_MATCH_LEN <= current_len {
1485            let abs_pos = current_abs_start + pos;
1486            let lit_len = pos - literals_start;
1487
1488            let best = self.best_match(abs_pos, lit_len);
1489            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1490                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1491                let current = self.window.back().unwrap().as_slice();
1492                let start = candidate.start - current_abs_start;
1493                let literals = &current[literals_start..start];
1494                handle_sequence(Sequence::Triple {
1495                    literals,
1496                    offset: candidate.offset,
1497                    match_len: candidate.match_len,
1498                });
1499                // The encoded offset value is irrelevant here; we only need the
1500                // side effect on offset history for future rep-code matching.
1501                let _ = encode_offset_with_history(
1502                    candidate.offset as u32,
1503                    literals.len() as u32,
1504                    &mut self.offset_hist,
1505                );
1506                pos = start + candidate.match_len;
1507                literals_start = pos;
1508            } else {
1509                self.insert_position(abs_pos);
1510                pos += 1;
1511            }
1512        }
1513
1514        if literals_start < current_len {
1515            // We stop inserting once fewer than DFAST_MIN_MATCH_LEN bytes remain.
1516            // The last boundary-spanning start that can seed a dfast hash is
1517            // still inserted by the loop above; `dfast_inserts_tail_positions_
1518            // for_next_block_matching()` asserts that the next block can match
1519            // immediately at the boundary without eagerly seeding the final
1520            // DFAST_MIN_MATCH_LEN - 1 bytes here.
1521            let current = self.window.back().unwrap().as_slice();
1522            handle_sequence(Sequence::Literals {
1523                literals: &current[literals_start..],
1524            });
1525        }
1526    }
1527
1528    fn ensure_hash_tables(&mut self) {
1529        let table_len = 1usize << self.hash_bits;
1530        if self.short_hash.len() != table_len {
1531            // This is intentionally lazy so Fastest/Uncompressed never pay the
1532            // ~dfast-level memory cost. The current size tracks the issue's
1533            // zstd level-3 style parameters rather than a generic low-memory preset.
1534            self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1535            self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1536        }
1537    }
1538
1539    fn compact_history(&mut self) {
1540        if self.history_start == 0 {
1541            return;
1542        }
1543        if self.history_start >= self.max_window_size
1544            || self.history_start * 2 >= self.history.len()
1545        {
1546            self.history.drain(..self.history_start);
1547            self.history_start = 0;
1548        }
1549    }
1550
1551    fn live_history(&self) -> &[u8] {
1552        &self.history[self.history_start..]
1553    }
1554
1555    fn history_abs_end(&self) -> usize {
1556        self.history_abs_start + self.live_history().len()
1557    }
1558
1559    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1560        let rep = self.repcode_candidate(abs_pos, lit_len);
1561        let hash = self.hash_candidate(abs_pos, lit_len);
1562        best_len_offset_candidate(rep, hash)
1563    }
1564
1565    fn pick_lazy_match(
1566        &self,
1567        abs_pos: usize,
1568        lit_len: usize,
1569        best: Option<MatchCandidate>,
1570    ) -> Option<MatchCandidate> {
1571        pick_lazy_match_shared(
1572            abs_pos,
1573            lit_len,
1574            best,
1575            LazyMatchConfig {
1576                target_len: DFAST_TARGET_LEN,
1577                min_match_len: DFAST_MIN_MATCH_LEN,
1578                lazy_depth: self.lazy_depth,
1579                history_abs_end: self.history_abs_end(),
1580            },
1581            |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
1582        )
1583    }
1584
1585    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1586        repcode_candidate_shared(
1587            self.live_history(),
1588            self.history_abs_start,
1589            self.offset_hist,
1590            abs_pos,
1591            lit_len,
1592            DFAST_MIN_MATCH_LEN,
1593        )
1594    }
1595
1596    fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1597        let concat = self.live_history();
1598        let current_idx = abs_pos - self.history_abs_start;
1599        let mut best = None;
1600        for candidate_pos in self.long_candidates(abs_pos) {
1601            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1602                continue;
1603            }
1604            let candidate_idx = candidate_pos - self.history_abs_start;
1605            let match_len =
1606                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1607            if match_len >= DFAST_MIN_MATCH_LEN {
1608                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1609                best = best_len_offset_candidate(best, Some(candidate));
1610                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1611                    return best;
1612                }
1613            }
1614        }
1615
1616        for candidate_pos in self.short_candidates(abs_pos) {
1617            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1618                continue;
1619            }
1620            let candidate_idx = candidate_pos - self.history_abs_start;
1621            let match_len =
1622                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1623            if match_len >= DFAST_MIN_MATCH_LEN {
1624                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1625                best = best_len_offset_candidate(best, Some(candidate));
1626                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1627                    return best;
1628                }
1629            }
1630        }
1631        best
1632    }
1633
1634    fn extend_backwards(
1635        &self,
1636        candidate_pos: usize,
1637        abs_pos: usize,
1638        match_len: usize,
1639        lit_len: usize,
1640    ) -> MatchCandidate {
1641        extend_backwards_shared(
1642            self.live_history(),
1643            self.history_abs_start,
1644            candidate_pos,
1645            abs_pos,
1646            match_len,
1647            lit_len,
1648        )
1649    }
1650
1651    fn insert_positions(&mut self, start: usize, end: usize) {
1652        for pos in start..end {
1653            self.insert_position(pos);
1654        }
1655    }
1656
1657    fn insert_position(&mut self, pos: usize) {
1658        let idx = pos - self.history_abs_start;
1659        let short = {
1660            let concat = self.live_history();
1661            (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
1662        };
1663        if let Some(short) = short {
1664            let bucket = &mut self.short_hash[short];
1665            if bucket[0] != pos {
1666                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1667                bucket[0] = pos;
1668            }
1669        }
1670
1671        let long = {
1672            let concat = self.live_history();
1673            (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
1674        };
1675        if let Some(long) = long {
1676            let bucket = &mut self.long_hash[long];
1677            if bucket[0] != pos {
1678                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1679                bucket[0] = pos;
1680            }
1681        }
1682    }
1683
1684    fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1685        let concat = self.live_history();
1686        let idx = pos - self.history_abs_start;
1687        (idx + 4 <= concat.len())
1688            .then(|| self.short_hash[self.hash4(&concat[idx..])])
1689            .into_iter()
1690            .flatten()
1691            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1692    }
1693
1694    fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1695        let concat = self.live_history();
1696        let idx = pos - self.history_abs_start;
1697        (idx + 8 <= concat.len())
1698            .then(|| self.long_hash[self.hash8(&concat[idx..])])
1699            .into_iter()
1700            .flatten()
1701            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1702    }
1703
1704    fn hash4(&self, data: &[u8]) -> usize {
1705        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1706        self.hash_index(value)
1707    }
1708
1709    fn hash8(&self, data: &[u8]) -> usize {
1710        let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1711        self.hash_index(value)
1712    }
1713
1714    fn hash_index(&self, value: u64) -> usize {
1715        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1716        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize
1717    }
1718}
1719
1720struct RowMatchGenerator {
1721    max_window_size: usize,
1722    window: VecDeque<Vec<u8>>,
1723    window_size: usize,
1724    history: Vec<u8>,
1725    history_start: usize,
1726    history_abs_start: usize,
1727    offset_hist: [u32; 3],
1728    row_hash_log: usize,
1729    row_log: usize,
1730    search_depth: usize,
1731    target_len: usize,
1732    lazy_depth: u8,
1733    row_heads: Vec<u8>,
1734    row_positions: Vec<usize>,
1735    row_tags: Vec<u8>,
1736}
1737
1738impl RowMatchGenerator {
1739    fn new(max_window_size: usize) -> Self {
1740        Self {
1741            max_window_size,
1742            window: VecDeque::new(),
1743            window_size: 0,
1744            history: Vec::new(),
1745            history_start: 0,
1746            history_abs_start: 0,
1747            offset_hist: [1, 4, 8],
1748            row_hash_log: ROW_HASH_BITS - ROW_LOG,
1749            row_log: ROW_LOG,
1750            search_depth: ROW_SEARCH_DEPTH,
1751            target_len: ROW_TARGET_LEN,
1752            lazy_depth: 1,
1753            row_heads: Vec::new(),
1754            row_positions: Vec::new(),
1755            row_tags: Vec::new(),
1756        }
1757    }
1758
1759    fn set_hash_bits(&mut self, bits: usize) {
1760        let clamped = bits.clamp(self.row_log + 1, ROW_HASH_BITS);
1761        let row_hash_log = clamped.saturating_sub(self.row_log);
1762        if self.row_hash_log != row_hash_log {
1763            self.row_hash_log = row_hash_log;
1764            self.row_heads.clear();
1765            self.row_positions.clear();
1766            self.row_tags.clear();
1767        }
1768    }
1769
1770    fn configure(&mut self, config: RowConfig) {
1771        self.row_log = config.row_log.clamp(4, 6);
1772        self.search_depth = config.search_depth;
1773        self.target_len = config.target_len;
1774        self.set_hash_bits(config.hash_bits.max(self.row_log + 1));
1775    }
1776
1777    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1778        self.window_size = 0;
1779        self.history.clear();
1780        self.history_start = 0;
1781        self.history_abs_start = 0;
1782        self.offset_hist = [1, 4, 8];
1783        self.row_heads.fill(0);
1784        self.row_positions.fill(ROW_EMPTY_SLOT);
1785        self.row_tags.fill(0);
1786        for mut data in self.window.drain(..) {
1787            data.resize(data.capacity(), 0);
1788            reuse_space(data);
1789        }
1790    }
1791
1792    fn get_last_space(&self) -> &[u8] {
1793        self.window.back().unwrap().as_slice()
1794    }
1795
1796    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1797        assert!(data.len() <= self.max_window_size);
1798        while self.window_size + data.len() > self.max_window_size {
1799            let removed = self.window.pop_front().unwrap();
1800            self.window_size -= removed.len();
1801            self.history_start += removed.len();
1802            self.history_abs_start += removed.len();
1803            reuse_space(removed);
1804        }
1805        self.compact_history();
1806        self.history.extend_from_slice(&data);
1807        self.window_size += data.len();
1808        self.window.push_back(data);
1809    }
1810
1811    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1812        while self.window_size > self.max_window_size {
1813            let removed = self.window.pop_front().unwrap();
1814            self.window_size -= removed.len();
1815            self.history_start += removed.len();
1816            self.history_abs_start += removed.len();
1817            reuse_space(removed);
1818        }
1819    }
1820
1821    fn skip_matching(&mut self) {
1822        self.ensure_tables();
1823        let current_len = self.window.back().unwrap().len();
1824        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1825        let backfill_start = self.backfill_start(current_abs_start);
1826        if backfill_start < current_abs_start {
1827            self.insert_positions(backfill_start, current_abs_start);
1828        }
1829        self.insert_positions(current_abs_start, current_abs_start + current_len);
1830    }
1831
1832    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1833        self.ensure_tables();
1834
1835        let current_len = self.window.back().unwrap().len();
1836        if current_len == 0 {
1837            return;
1838        }
1839        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1840        let backfill_start = self.backfill_start(current_abs_start);
1841        if backfill_start < current_abs_start {
1842            self.insert_positions(backfill_start, current_abs_start);
1843        }
1844
1845        let mut pos = 0usize;
1846        let mut literals_start = 0usize;
1847        while pos + ROW_MIN_MATCH_LEN <= current_len {
1848            let abs_pos = current_abs_start + pos;
1849            let lit_len = pos - literals_start;
1850
1851            let best = self.best_match(abs_pos, lit_len);
1852            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1853                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1854                let current = self.window.back().unwrap().as_slice();
1855                let start = candidate.start - current_abs_start;
1856                let literals = &current[literals_start..start];
1857                handle_sequence(Sequence::Triple {
1858                    literals,
1859                    offset: candidate.offset,
1860                    match_len: candidate.match_len,
1861                });
1862                let _ = encode_offset_with_history(
1863                    candidate.offset as u32,
1864                    literals.len() as u32,
1865                    &mut self.offset_hist,
1866                );
1867                pos = start + candidate.match_len;
1868                literals_start = pos;
1869            } else {
1870                self.insert_position(abs_pos);
1871                pos += 1;
1872            }
1873        }
1874
1875        while pos + ROW_HASH_KEY_LEN <= current_len {
1876            self.insert_position(current_abs_start + pos);
1877            pos += 1;
1878        }
1879
1880        if literals_start < current_len {
1881            let current = self.window.back().unwrap().as_slice();
1882            handle_sequence(Sequence::Literals {
1883                literals: &current[literals_start..],
1884            });
1885        }
1886    }
1887
1888    fn ensure_tables(&mut self) {
1889        let row_count = 1usize << self.row_hash_log;
1890        let row_entries = 1usize << self.row_log;
1891        let total = row_count * row_entries;
1892        if self.row_positions.len() != total {
1893            self.row_heads = alloc::vec![0; row_count];
1894            self.row_positions = alloc::vec![ROW_EMPTY_SLOT; total];
1895            self.row_tags = alloc::vec![0; total];
1896        }
1897    }
1898
1899    fn compact_history(&mut self) {
1900        if self.history_start == 0 {
1901            return;
1902        }
1903        if self.history_start >= self.max_window_size
1904            || self.history_start * 2 >= self.history.len()
1905        {
1906            self.history.drain(..self.history_start);
1907            self.history_start = 0;
1908        }
1909    }
1910
1911    fn live_history(&self) -> &[u8] {
1912        &self.history[self.history_start..]
1913    }
1914
1915    fn history_abs_end(&self) -> usize {
1916        self.history_abs_start + self.live_history().len()
1917    }
1918
1919    fn hash_and_row(&self, abs_pos: usize) -> Option<(usize, u8)> {
1920        let idx = abs_pos - self.history_abs_start;
1921        let concat = self.live_history();
1922        if idx + ROW_HASH_KEY_LEN > concat.len() {
1923            return None;
1924        }
1925        let value =
1926            u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
1927        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1928        let hash = value.wrapping_mul(PRIME);
1929        let total_bits = self.row_hash_log + ROW_TAG_BITS;
1930        let combined = hash >> (u64::BITS as usize - total_bits);
1931        let row_mask = (1usize << self.row_hash_log) - 1;
1932        let row = ((combined >> ROW_TAG_BITS) as usize) & row_mask;
1933        let tag = combined as u8;
1934        Some((row, tag))
1935    }
1936
1937    fn backfill_start(&self, current_abs_start: usize) -> usize {
1938        current_abs_start
1939            .saturating_sub(ROW_HASH_KEY_LEN - 1)
1940            .max(self.history_abs_start)
1941    }
1942
1943    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1944        let rep = self.repcode_candidate(abs_pos, lit_len);
1945        let row = self.row_candidate(abs_pos, lit_len);
1946        best_len_offset_candidate(rep, row)
1947    }
1948
1949    fn pick_lazy_match(
1950        &self,
1951        abs_pos: usize,
1952        lit_len: usize,
1953        best: Option<MatchCandidate>,
1954    ) -> Option<MatchCandidate> {
1955        pick_lazy_match_shared(
1956            abs_pos,
1957            lit_len,
1958            best,
1959            LazyMatchConfig {
1960                target_len: self.target_len,
1961                min_match_len: ROW_MIN_MATCH_LEN,
1962                lazy_depth: self.lazy_depth,
1963                history_abs_end: self.history_abs_end(),
1964            },
1965            |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
1966        )
1967    }
1968
1969    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1970        repcode_candidate_shared(
1971            self.live_history(),
1972            self.history_abs_start,
1973            self.offset_hist,
1974            abs_pos,
1975            lit_len,
1976            ROW_MIN_MATCH_LEN,
1977        )
1978    }
1979
1980    fn row_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1981        let concat = self.live_history();
1982        let current_idx = abs_pos - self.history_abs_start;
1983        if current_idx + ROW_MIN_MATCH_LEN > concat.len() {
1984            return None;
1985        }
1986
1987        let (row, tag) = self.hash_and_row(abs_pos)?;
1988        let row_entries = 1usize << self.row_log;
1989        let row_mask = row_entries - 1;
1990        let row_base = row << self.row_log;
1991        let head = self.row_heads[row] as usize;
1992        let max_walk = self.search_depth.min(row_entries);
1993
1994        let mut best = None;
1995        for i in 0..max_walk {
1996            let slot = (head + i) & row_mask;
1997            let idx = row_base + slot;
1998            if self.row_tags[idx] != tag {
1999                continue;
2000            }
2001            let candidate_pos = self.row_positions[idx];
2002            if candidate_pos == ROW_EMPTY_SLOT
2003                || candidate_pos < self.history_abs_start
2004                || candidate_pos >= abs_pos
2005            {
2006                continue;
2007            }
2008            let candidate_idx = candidate_pos - self.history_abs_start;
2009            let match_len =
2010                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2011            if match_len >= ROW_MIN_MATCH_LEN {
2012                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2013                best = best_len_offset_candidate(best, Some(candidate));
2014                if best.is_some_and(|best| best.match_len >= self.target_len) {
2015                    return best;
2016                }
2017            }
2018        }
2019        best
2020    }
2021
2022    fn extend_backwards(
2023        &self,
2024        candidate_pos: usize,
2025        abs_pos: usize,
2026        match_len: usize,
2027        lit_len: usize,
2028    ) -> MatchCandidate {
2029        extend_backwards_shared(
2030            self.live_history(),
2031            self.history_abs_start,
2032            candidate_pos,
2033            abs_pos,
2034            match_len,
2035            lit_len,
2036        )
2037    }
2038
2039    fn insert_positions(&mut self, start: usize, end: usize) {
2040        for pos in start..end {
2041            self.insert_position(pos);
2042        }
2043    }
2044
2045    fn insert_position(&mut self, abs_pos: usize) {
2046        let Some((row, tag)) = self.hash_and_row(abs_pos) else {
2047            return;
2048        };
2049        let row_entries = 1usize << self.row_log;
2050        let row_mask = row_entries - 1;
2051        let row_base = row << self.row_log;
2052        let head = self.row_heads[row] as usize;
2053        let next = head.wrapping_sub(1) & row_mask;
2054        self.row_heads[row] = next as u8;
2055        self.row_tags[row_base + next] = tag;
2056        self.row_positions[row_base + next] = abs_pos;
2057    }
2058}
2059
2060struct HcMatchGenerator {
2061    max_window_size: usize,
2062    window: VecDeque<Vec<u8>>,
2063    window_size: usize,
2064    history: Vec<u8>,
2065    history_start: usize,
2066    history_abs_start: usize,
2067    position_base: usize,
2068    offset_hist: [u32; 3],
2069    hash_table: Vec<u32>,
2070    chain_table: Vec<u32>,
2071    lazy_depth: u8,
2072    hash_log: usize,
2073    chain_log: usize,
2074    search_depth: usize,
2075    target_len: usize,
2076}
2077
2078impl HcMatchGenerator {
2079    fn new(max_window_size: usize) -> Self {
2080        Self {
2081            max_window_size,
2082            window: VecDeque::new(),
2083            window_size: 0,
2084            history: Vec::new(),
2085            history_start: 0,
2086            history_abs_start: 0,
2087            position_base: 0,
2088            offset_hist: [1, 4, 8],
2089            hash_table: Vec::new(),
2090            chain_table: Vec::new(),
2091            lazy_depth: 2,
2092            hash_log: HC_HASH_LOG,
2093            chain_log: HC_CHAIN_LOG,
2094            search_depth: HC_SEARCH_DEPTH,
2095            target_len: HC_TARGET_LEN,
2096        }
2097    }
2098
2099    fn configure(&mut self, config: HcConfig) {
2100        let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
2101        self.hash_log = config.hash_log;
2102        self.chain_log = config.chain_log;
2103        self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
2104        self.target_len = config.target_len;
2105        if resize && !self.hash_table.is_empty() {
2106            // Force reallocation on next ensure_tables() call.
2107            self.hash_table.clear();
2108            self.chain_table.clear();
2109        }
2110    }
2111
2112    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2113        self.window_size = 0;
2114        self.history.clear();
2115        self.history_start = 0;
2116        self.history_abs_start = 0;
2117        self.position_base = 0;
2118        self.offset_hist = [1, 4, 8];
2119        if !self.hash_table.is_empty() {
2120            self.hash_table.fill(HC_EMPTY);
2121            self.chain_table.fill(HC_EMPTY);
2122        }
2123        for mut data in self.window.drain(..) {
2124            data.resize(data.capacity(), 0);
2125            reuse_space(data);
2126        }
2127    }
2128
2129    fn get_last_space(&self) -> &[u8] {
2130        self.window.back().unwrap().as_slice()
2131    }
2132
2133    // History duplicates window data for O(1) contiguous access during match
2134    // finding (common_prefix_len, extend_backwards). Same pattern as
2135    // DfastMatchGenerator. Peak: ~2x window size for data buffers + 6 MB tables.
2136    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2137        assert!(data.len() <= self.max_window_size);
2138        while self.window_size + data.len() > self.max_window_size {
2139            let removed = self.window.pop_front().unwrap();
2140            self.window_size -= removed.len();
2141            self.history_start += removed.len();
2142            self.history_abs_start += removed.len();
2143            reuse_space(removed);
2144        }
2145        self.compact_history();
2146        self.history.extend_from_slice(&data);
2147        self.window_size += data.len();
2148        self.window.push_back(data);
2149    }
2150
2151    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2152        while self.window_size > self.max_window_size {
2153            let removed = self.window.pop_front().unwrap();
2154            self.window_size -= removed.len();
2155            self.history_start += removed.len();
2156            self.history_abs_start += removed.len();
2157            reuse_space(removed);
2158        }
2159    }
2160
2161    /// Backfill positions from the tail of the previous slice that couldn't be
2162    /// hashed at the time (insert_position needs 4 bytes of lookahead).
2163    fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
2164        let backfill_start = current_abs_start
2165            .saturating_sub(3)
2166            .max(self.history_abs_start);
2167        if backfill_start < current_abs_start {
2168            self.insert_positions(backfill_start, current_abs_start);
2169        }
2170    }
2171
2172    fn skip_matching(&mut self) {
2173        self.ensure_tables();
2174        let current_len = self.window.back().unwrap().len();
2175        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2176        self.backfill_boundary_positions(current_abs_start);
2177        self.insert_positions(current_abs_start, current_abs_start + current_len);
2178    }
2179
2180    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2181        self.ensure_tables();
2182
2183        let current_len = self.window.back().unwrap().len();
2184        if current_len == 0 {
2185            return;
2186        }
2187
2188        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2189        self.backfill_boundary_positions(current_abs_start);
2190
2191        let mut pos = 0usize;
2192        let mut literals_start = 0usize;
2193        while pos + HC_MIN_MATCH_LEN <= current_len {
2194            let abs_pos = current_abs_start + pos;
2195            let lit_len = pos - literals_start;
2196
2197            let best = self.find_best_match(abs_pos, lit_len);
2198            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2199                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2200                let current = self.window.back().unwrap().as_slice();
2201                let start = candidate.start - current_abs_start;
2202                let literals = &current[literals_start..start];
2203                handle_sequence(Sequence::Triple {
2204                    literals,
2205                    offset: candidate.offset,
2206                    match_len: candidate.match_len,
2207                });
2208                let _ = encode_offset_with_history(
2209                    candidate.offset as u32,
2210                    literals.len() as u32,
2211                    &mut self.offset_hist,
2212                );
2213                pos = start + candidate.match_len;
2214                literals_start = pos;
2215            } else {
2216                self.insert_position(abs_pos);
2217                pos += 1;
2218            }
2219        }
2220
2221        // Insert remaining hashable positions in the tail (the matching loop
2222        // stops at HC_MIN_MATCH_LEN but insert_position only needs 4 bytes).
2223        while pos + 4 <= current_len {
2224            self.insert_position(current_abs_start + pos);
2225            pos += 1;
2226        }
2227
2228        if literals_start < current_len {
2229            let current = self.window.back().unwrap().as_slice();
2230            handle_sequence(Sequence::Literals {
2231                literals: &current[literals_start..],
2232            });
2233        }
2234    }
2235
2236    fn ensure_tables(&mut self) {
2237        if self.hash_table.is_empty() {
2238            self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
2239            self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
2240        }
2241    }
2242
2243    fn compact_history(&mut self) {
2244        if self.history_start == 0 {
2245            return;
2246        }
2247        if self.history_start >= self.max_window_size
2248            || self.history_start * 2 >= self.history.len()
2249        {
2250            self.history.drain(..self.history_start);
2251            self.history_start = 0;
2252        }
2253    }
2254
2255    fn live_history(&self) -> &[u8] {
2256        &self.history[self.history_start..]
2257    }
2258
2259    fn history_abs_end(&self) -> usize {
2260        self.history_abs_start + self.live_history().len()
2261    }
2262
2263    fn hash_position(&self, data: &[u8]) -> usize {
2264        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2265        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2266        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
2267    }
2268
2269    fn relative_position(&self, abs_pos: usize) -> Option<u32> {
2270        let rel = abs_pos.checked_sub(self.position_base)?;
2271        let rel_u32 = u32::try_from(rel).ok()?;
2272        // Positions are stored as (relative_pos + 1), with 0 reserved as the
2273        // empty sentinel. So the raw relative position itself must stay
2274        // strictly below u32::MAX.
2275        (rel_u32 < u32::MAX).then_some(rel_u32)
2276    }
2277
2278    fn maybe_rebase_positions(&mut self, abs_pos: usize) {
2279        let needs_rebase = self
2280            .relative_position(abs_pos)
2281            .is_none_or(|relative| relative >= u32::MAX - 1);
2282        if !needs_rebase {
2283            return;
2284        }
2285
2286        // Keep all live history addressable after rebase.
2287        self.position_base = self.history_abs_start;
2288        self.hash_table.fill(HC_EMPTY);
2289        self.chain_table.fill(HC_EMPTY);
2290
2291        let history_start = self.history_abs_start;
2292        // Rebuild only the already-inserted prefix. The caller inserts abs_pos
2293        // immediately after this, and later positions are added in-order.
2294        for pos in history_start..abs_pos {
2295            self.insert_position_no_rebase(pos);
2296        }
2297    }
2298
2299    fn insert_position(&mut self, abs_pos: usize) {
2300        self.maybe_rebase_positions(abs_pos);
2301        self.insert_position_no_rebase(abs_pos);
2302    }
2303
2304    fn insert_position_no_rebase(&mut self, abs_pos: usize) {
2305        let idx = abs_pos - self.history_abs_start;
2306        let concat = self.live_history();
2307        if idx + 4 > concat.len() {
2308            return;
2309        }
2310        let hash = self.hash_position(&concat[idx..]);
2311        let Some(relative_pos) = self.relative_position(abs_pos) else {
2312            return;
2313        };
2314        let stored = relative_pos + 1;
2315        let chain_idx = relative_pos as usize & ((1 << self.chain_log) - 1);
2316        let prev = self.hash_table[hash];
2317        self.chain_table[chain_idx] = prev;
2318        self.hash_table[hash] = stored;
2319    }
2320
2321    fn insert_positions(&mut self, start: usize, end: usize) {
2322        for pos in start..end {
2323            self.insert_position(pos);
2324        }
2325    }
2326
2327    // Fixed-size stack array is intentional: it avoids heap allocation on
2328    // the hot path and the sentinel loop exits at self.search_depth.
2329    fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
2330        let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
2331        let idx = abs_pos - self.history_abs_start;
2332        let concat = self.live_history();
2333        if idx + 4 > concat.len() {
2334            return buf;
2335        }
2336        let hash = self.hash_position(&concat[idx..]);
2337        let chain_mask = (1 << self.chain_log) - 1;
2338
2339        let mut cur = self.hash_table[hash];
2340        let mut filled = 0;
2341        // Follow chain up to search_depth valid candidates, skipping stale
2342        // entries (evicted from window) instead of stopping at them.
2343        // Stored values are (relative_pos + 1); decode with wrapping_sub(1)
2344        // and recover absolute position via position_base + relative.
2345        // Break on self-loops (masked chain_idx collision at periodicity).
2346        // Cap total steps at 4x search depth to bound time spent skipping
2347        // stale entries while still finding valid candidates deeper in chain.
2348        let mut steps = 0;
2349        let max_chain_steps = self.search_depth * 4;
2350        while filled < self.search_depth && steps < max_chain_steps {
2351            if cur == HC_EMPTY {
2352                break;
2353            }
2354            let candidate_rel = cur.wrapping_sub(1) as usize;
2355            let candidate_abs = self.position_base + candidate_rel;
2356            let next = self.chain_table[candidate_rel & chain_mask];
2357            steps += 1;
2358            if next == cur {
2359                // Self-loop: two positions share chain_idx, stop to avoid
2360                // spinning on the same candidate forever.
2361                if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
2362                    buf[filled] = candidate_abs;
2363                }
2364                break;
2365            }
2366            cur = next;
2367            if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
2368                continue;
2369            }
2370            buf[filled] = candidate_abs;
2371            filled += 1;
2372        }
2373        buf
2374    }
2375
2376    fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2377        let rep = self.repcode_candidate(abs_pos, lit_len);
2378        let hash = self.hash_chain_candidate(abs_pos, lit_len);
2379        Self::better_candidate(rep, hash)
2380    }
2381
2382    fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2383        let concat = self.live_history();
2384        let current_idx = abs_pos - self.history_abs_start;
2385        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2386            return None;
2387        }
2388
2389        let mut best: Option<MatchCandidate> = None;
2390        for candidate_abs in self.chain_candidates(abs_pos) {
2391            if candidate_abs == usize::MAX {
2392                break;
2393            }
2394            let candidate_idx = candidate_abs - self.history_abs_start;
2395            let match_len =
2396                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2397            if match_len >= HC_MIN_MATCH_LEN {
2398                let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
2399                best = Self::better_candidate(best, Some(candidate));
2400                if best.is_some_and(|b| b.match_len >= self.target_len) {
2401                    return best;
2402                }
2403            }
2404        }
2405        best
2406    }
2407
2408    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2409        let reps = if lit_len == 0 {
2410            [
2411                Some(self.offset_hist[1] as usize),
2412                Some(self.offset_hist[2] as usize),
2413                (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
2414            ]
2415        } else {
2416            [
2417                Some(self.offset_hist[0] as usize),
2418                Some(self.offset_hist[1] as usize),
2419                Some(self.offset_hist[2] as usize),
2420            ]
2421        };
2422
2423        let concat = self.live_history();
2424        let current_idx = abs_pos - self.history_abs_start;
2425        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2426            return None;
2427        }
2428
2429        let mut best = None;
2430        for rep in reps.into_iter().flatten() {
2431            if rep == 0 || rep > abs_pos {
2432                continue;
2433            }
2434            let candidate_pos = abs_pos - rep;
2435            if candidate_pos < self.history_abs_start {
2436                continue;
2437            }
2438            let candidate_idx = candidate_pos - self.history_abs_start;
2439            let match_len =
2440                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2441            if match_len >= HC_MIN_MATCH_LEN {
2442                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2443                best = Self::better_candidate(best, Some(candidate));
2444            }
2445        }
2446        best
2447    }
2448
2449    fn extend_backwards(
2450        &self,
2451        mut candidate_pos: usize,
2452        mut abs_pos: usize,
2453        mut match_len: usize,
2454        lit_len: usize,
2455    ) -> MatchCandidate {
2456        let concat = self.live_history();
2457        let min_abs_pos = abs_pos - lit_len;
2458        while abs_pos > min_abs_pos
2459            && candidate_pos > self.history_abs_start
2460            && concat[candidate_pos - self.history_abs_start - 1]
2461                == concat[abs_pos - self.history_abs_start - 1]
2462        {
2463            candidate_pos -= 1;
2464            abs_pos -= 1;
2465            match_len += 1;
2466        }
2467        MatchCandidate {
2468            start: abs_pos,
2469            offset: abs_pos - candidate_pos,
2470            match_len,
2471        }
2472    }
2473
2474    fn better_candidate(
2475        lhs: Option<MatchCandidate>,
2476        rhs: Option<MatchCandidate>,
2477    ) -> Option<MatchCandidate> {
2478        match (lhs, rhs) {
2479            (None, other) | (other, None) => other,
2480            (Some(lhs), Some(rhs)) => {
2481                let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
2482                let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
2483                if rhs_gain > lhs_gain {
2484                    Some(rhs)
2485                } else {
2486                    Some(lhs)
2487                }
2488            }
2489        }
2490    }
2491
2492    fn match_gain(match_len: usize, offset: usize) -> i32 {
2493        debug_assert!(
2494            offset > 0,
2495            "zstd offsets are 1-indexed, offset=0 is invalid"
2496        );
2497        let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
2498        (match_len as i32) * 4 - offset_bits
2499    }
2500
2501    // Lazy lookahead queries pos+1/pos+2 before they are inserted into hash
2502    // tables — matching C zstd behavior. Seeding before comparing would let a
2503    // position match against itself, changing semantics.
2504    fn pick_lazy_match(
2505        &self,
2506        abs_pos: usize,
2507        lit_len: usize,
2508        best: Option<MatchCandidate>,
2509    ) -> Option<MatchCandidate> {
2510        let best = best?;
2511        if best.match_len >= self.target_len
2512            || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
2513        {
2514            return Some(best);
2515        }
2516
2517        let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
2518
2519        // Lazy check: evaluate pos+1
2520        let next = self.find_best_match(abs_pos + 1, lit_len + 1);
2521        if let Some(next) = next {
2522            let next_gain = Self::match_gain(next.match_len, next.offset);
2523            if next_gain > current_gain {
2524                return None;
2525            }
2526        }
2527
2528        // Lazy2 check: also evaluate pos+2
2529        if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
2530            let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
2531            if let Some(next2) = next2 {
2532                let next2_gain = Self::match_gain(next2.match_len, next2.offset);
2533                // Must beat current gain + extra literal cost
2534                if next2_gain > current_gain + 4 {
2535                    return None;
2536                }
2537            }
2538        }
2539
2540        Some(best)
2541    }
2542}
2543
2544#[test]
2545fn matches() {
2546    let mut matcher = MatchGenerator::new(1000);
2547    let mut original_data = Vec::new();
2548    let mut reconstructed = Vec::new();
2549
2550    let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
2551        Sequence::Literals { literals } => {
2552            assert!(!literals.is_empty());
2553            reconstructed.extend_from_slice(literals);
2554        }
2555        Sequence::Triple {
2556            literals,
2557            offset,
2558            match_len,
2559        } => {
2560            assert!(offset > 0);
2561            assert!(match_len >= MIN_MATCH_LEN);
2562            reconstructed.extend_from_slice(literals);
2563            assert!(offset <= reconstructed.len());
2564            let start = reconstructed.len() - offset;
2565            for i in 0..match_len {
2566                let byte = reconstructed[start + i];
2567                reconstructed.push(byte);
2568            }
2569        }
2570    };
2571
2572    matcher.add_data(
2573        alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2574        SuffixStore::with_capacity(100),
2575        |_, _| {},
2576    );
2577    original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
2578
2579    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2580
2581    assert!(!matcher.next_sequence(|_| {}));
2582
2583    matcher.add_data(
2584        alloc::vec![
2585            1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2586        ],
2587        SuffixStore::with_capacity(100),
2588        |_, _| {},
2589    );
2590    original_data.extend_from_slice(&[
2591        1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2592    ]);
2593
2594    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2595    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2596    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2597    assert!(!matcher.next_sequence(|_| {}));
2598
2599    matcher.add_data(
2600        alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
2601        SuffixStore::with_capacity(100),
2602        |_, _| {},
2603    );
2604    original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
2605
2606    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2607    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2608    assert!(!matcher.next_sequence(|_| {}));
2609
2610    matcher.add_data(
2611        alloc::vec![0, 0, 0, 0, 0],
2612        SuffixStore::with_capacity(100),
2613        |_, _| {},
2614    );
2615    original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
2616
2617    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2618    assert!(!matcher.next_sequence(|_| {}));
2619
2620    matcher.add_data(
2621        alloc::vec![7, 8, 9, 10, 11],
2622        SuffixStore::with_capacity(100),
2623        |_, _| {},
2624    );
2625    original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
2626
2627    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2628    assert!(!matcher.next_sequence(|_| {}));
2629
2630    matcher.add_data(
2631        alloc::vec![1, 3, 5, 7, 9],
2632        SuffixStore::with_capacity(100),
2633        |_, _| {},
2634    );
2635    matcher.skip_matching();
2636    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2637    reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
2638    assert!(!matcher.next_sequence(|_| {}));
2639
2640    matcher.add_data(
2641        alloc::vec![1, 3, 5, 7, 9],
2642        SuffixStore::with_capacity(100),
2643        |_, _| {},
2644    );
2645    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2646
2647    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2648    assert!(!matcher.next_sequence(|_| {}));
2649
2650    matcher.add_data(
2651        alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
2652        SuffixStore::with_capacity(100),
2653        |_, _| {},
2654    );
2655    original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
2656
2657    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2658    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2659    assert!(!matcher.next_sequence(|_| {}));
2660
2661    assert_eq!(reconstructed, original_data);
2662}
2663
2664#[test]
2665fn dfast_matches_roundtrip_multi_block_pattern() {
2666    let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
2667    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2668    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2669
2670    let mut matcher = DfastMatchGenerator::new(1 << 22);
2671    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2672        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2673        Sequence::Triple {
2674            literals,
2675            offset,
2676            match_len,
2677        } => {
2678            decoded.extend_from_slice(literals);
2679            let start = decoded.len() - offset;
2680            for i in 0..match_len {
2681                let byte = decoded[start + i];
2682                decoded.push(byte);
2683            }
2684        }
2685    };
2686
2687    matcher.add_data(first_block.clone(), |_| {});
2688    let mut history = Vec::new();
2689    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2690    assert_eq!(history, first_block);
2691
2692    matcher.add_data(second_block.clone(), |_| {});
2693    let prefix_len = history.len();
2694    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2695
2696    assert_eq!(&history[prefix_len..], second_block.as_slice());
2697}
2698
2699#[test]
2700fn driver_switches_backends_and_initializes_dfast_via_reset() {
2701    let mut driver = MatchGeneratorDriver::new(32, 2);
2702
2703    driver.reset(CompressionLevel::Default);
2704    assert_eq!(driver.active_backend, MatcherBackend::Dfast);
2705    assert_eq!(driver.window_size(), (1u64 << 22));
2706
2707    let mut first = driver.get_next_space();
2708    first[..12].copy_from_slice(b"abcabcabcabc");
2709    first.truncate(12);
2710    driver.commit_space(first);
2711    assert_eq!(driver.get_last_space(), b"abcabcabcabc");
2712    driver.skip_matching();
2713
2714    let mut second = driver.get_next_space();
2715    second[..12].copy_from_slice(b"abcabcabcabc");
2716    second.truncate(12);
2717    driver.commit_space(second);
2718
2719    let mut reconstructed = b"abcabcabcabc".to_vec();
2720    driver.start_matching(|seq| match seq {
2721        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2722        Sequence::Triple {
2723            literals,
2724            offset,
2725            match_len,
2726        } => {
2727            reconstructed.extend_from_slice(literals);
2728            let start = reconstructed.len() - offset;
2729            for i in 0..match_len {
2730                let byte = reconstructed[start + i];
2731                reconstructed.push(byte);
2732            }
2733        }
2734    });
2735    assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
2736
2737    driver.reset(CompressionLevel::Fastest);
2738    assert_eq!(driver.window_size(), (1u64 << 17));
2739}
2740
2741#[test]
2742fn driver_level4_selects_row_backend() {
2743    let mut driver = MatchGeneratorDriver::new(32, 2);
2744    driver.reset(CompressionLevel::Level(4));
2745    assert_eq!(driver.active_backend, MatcherBackend::Row);
2746}
2747
2748#[test]
2749fn driver_small_source_hint_shrinks_dfast_hash_tables() {
2750    let mut driver = MatchGeneratorDriver::new(32, 2);
2751
2752    driver.reset(CompressionLevel::Level(2));
2753    let mut space = driver.get_next_space();
2754    space[..12].copy_from_slice(b"abcabcabcabc");
2755    space.truncate(12);
2756    driver.commit_space(space);
2757    driver.skip_matching();
2758    let full_tables = driver.dfast_matcher().short_hash.len();
2759    assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
2760
2761    driver.set_source_size_hint(1024);
2762    driver.reset(CompressionLevel::Level(2));
2763    let mut space = driver.get_next_space();
2764    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2765    space.truncate(12);
2766    driver.commit_space(space);
2767    driver.skip_matching();
2768    let hinted_tables = driver.dfast_matcher().short_hash.len();
2769
2770    assert_eq!(driver.window_size(), 1 << MIN_WINDOW_LOG);
2771    assert_eq!(hinted_tables, 1 << MIN_WINDOW_LOG);
2772    assert!(
2773        hinted_tables < full_tables,
2774        "tiny source hint should reduce dfast table footprint"
2775    );
2776}
2777
2778#[test]
2779fn driver_small_source_hint_shrinks_row_hash_tables() {
2780    let mut driver = MatchGeneratorDriver::new(32, 2);
2781
2782    driver.reset(CompressionLevel::Level(4));
2783    let mut space = driver.get_next_space();
2784    space[..12].copy_from_slice(b"abcabcabcabc");
2785    space.truncate(12);
2786    driver.commit_space(space);
2787    driver.skip_matching();
2788    let full_rows = driver.row_matcher().row_heads.len();
2789    assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
2790
2791    driver.set_source_size_hint(1024);
2792    driver.reset(CompressionLevel::Level(4));
2793    let mut space = driver.get_next_space();
2794    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2795    space.truncate(12);
2796    driver.commit_space(space);
2797    driver.skip_matching();
2798    let hinted_rows = driver.row_matcher().row_heads.len();
2799
2800    assert_eq!(driver.window_size(), 1 << MIN_WINDOW_LOG);
2801    assert_eq!(hinted_rows, 1 << ((MIN_WINDOW_LOG as usize) - ROW_LOG));
2802    assert!(
2803        hinted_rows < full_rows,
2804        "tiny source hint should reduce row hash table footprint"
2805    );
2806}
2807
2808#[test]
2809fn row_matches_roundtrip_multi_block_pattern() {
2810    let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
2811    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2812    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2813
2814    let mut matcher = RowMatchGenerator::new(1 << 22);
2815    matcher.configure(ROW_CONFIG);
2816    matcher.ensure_tables();
2817    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2818        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2819        Sequence::Triple {
2820            literals,
2821            offset,
2822            match_len,
2823        } => {
2824            decoded.extend_from_slice(literals);
2825            let start = decoded.len() - offset;
2826            for i in 0..match_len {
2827                let byte = decoded[start + i];
2828                decoded.push(byte);
2829            }
2830        }
2831    };
2832
2833    matcher.add_data(first_block.clone(), |_| {});
2834    let mut history = Vec::new();
2835    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2836    assert_eq!(history, first_block);
2837
2838    matcher.add_data(second_block.clone(), |_| {});
2839    let prefix_len = history.len();
2840    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2841
2842    assert_eq!(&history[prefix_len..], second_block.as_slice());
2843
2844    // Force a literals-only pass so the Sequence::Literals arm is exercised.
2845    let third_block: Vec<u8> = (0u8..=255).collect();
2846    matcher.add_data(third_block.clone(), |_| {});
2847    let third_prefix = history.len();
2848    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2849    assert_eq!(&history[third_prefix..], third_block.as_slice());
2850}
2851
2852#[test]
2853fn row_short_block_emits_literals_only() {
2854    let mut matcher = RowMatchGenerator::new(1 << 22);
2855    matcher.configure(ROW_CONFIG);
2856
2857    matcher.add_data(b"abcde".to_vec(), |_| {});
2858
2859    let mut saw_triple = false;
2860    let mut reconstructed = Vec::new();
2861    matcher.start_matching(|seq| match seq {
2862        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2863        Sequence::Triple { .. } => saw_triple = true,
2864    });
2865
2866    assert!(
2867        !saw_triple,
2868        "row backend must not emit triples for short blocks"
2869    );
2870    assert_eq!(reconstructed, b"abcde");
2871
2872    // Then feed a clearly matchable block and ensure the Triple arm is reachable.
2873    saw_triple = false;
2874    matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
2875    matcher.start_matching(|seq| {
2876        if let Sequence::Triple { .. } = seq {
2877            saw_triple = true;
2878        }
2879    });
2880    assert!(
2881        saw_triple,
2882        "row backend should emit triples on repeated data"
2883    );
2884}
2885
2886#[test]
2887fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
2888    let mut matcher = RowMatchGenerator::new(1 << 22);
2889    matcher.configure(ROW_CONFIG);
2890    matcher.add_data(b"abcabc".to_vec(), |_| {});
2891
2892    let best = MatchCandidate {
2893        start: 0,
2894        offset: 1,
2895        match_len: ROW_MIN_MATCH_LEN,
2896    };
2897    let picked = matcher
2898        .pick_lazy_match(0, 0, Some(best))
2899        .expect("best candidate must survive");
2900
2901    assert_eq!(picked.start, best.start);
2902    assert_eq!(picked.offset, best.offset);
2903    assert_eq!(picked.match_len, best.match_len);
2904}
2905
2906#[test]
2907fn row_backfills_previous_block_tail_for_cross_boundary_match() {
2908    let mut matcher = RowMatchGenerator::new(1 << 22);
2909    matcher.configure(ROW_CONFIG);
2910
2911    let mut first_block = alloc::vec![0xA5; 64];
2912    first_block.extend_from_slice(b"XYZ");
2913    let second_block = b"XYZXYZtail".to_vec();
2914
2915    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2916        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2917        Sequence::Triple {
2918            literals,
2919            offset,
2920            match_len,
2921        } => {
2922            decoded.extend_from_slice(literals);
2923            let start = decoded.len() - offset;
2924            for i in 0..match_len {
2925                let byte = decoded[start + i];
2926                decoded.push(byte);
2927            }
2928        }
2929    };
2930
2931    matcher.add_data(first_block.clone(), |_| {});
2932    let mut reconstructed = Vec::new();
2933    matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
2934    assert_eq!(reconstructed, first_block);
2935
2936    matcher.add_data(second_block.clone(), |_| {});
2937    let mut saw_cross_boundary = false;
2938    let prefix_len = reconstructed.len();
2939    matcher.start_matching(|seq| {
2940        if let Sequence::Triple {
2941            literals,
2942            offset,
2943            match_len,
2944        } = seq
2945            && literals.is_empty()
2946            && offset == 3
2947            && match_len >= ROW_MIN_MATCH_LEN
2948        {
2949            saw_cross_boundary = true;
2950        }
2951        replay_sequence(&mut reconstructed, seq);
2952    });
2953
2954    assert!(
2955        saw_cross_boundary,
2956        "row matcher should reuse the 3-byte previous-block tail"
2957    );
2958    assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
2959}
2960
2961#[test]
2962fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
2963    let mut driver = MatchGeneratorDriver::new(32, 2);
2964
2965    driver.reset(CompressionLevel::Level(2));
2966    let mut space = driver.get_next_space();
2967    space[..12].copy_from_slice(b"abcabcabcabc");
2968    space.truncate(12);
2969    driver.commit_space(space);
2970    driver.skip_matching();
2971
2972    let table_len = driver.dfast_matcher().short_hash.len();
2973    assert_eq!(
2974        table_len,
2975        1 << DFAST_HASH_BITS,
2976        "unhinted Level(2) should keep default dfast table size"
2977    );
2978}
2979
2980#[test]
2981fn simple_backend_rejects_undersized_pooled_suffix_store() {
2982    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
2983    driver.reset(CompressionLevel::Fastest);
2984
2985    driver.suffix_pool.push(SuffixStore::with_capacity(1024));
2986
2987    let mut space = driver.get_next_space();
2988    space.clear();
2989    space.resize(4096, 0xAB);
2990    driver.commit_space(space);
2991
2992    let last_suffix_slots = driver
2993        .match_generator
2994        .window
2995        .last()
2996        .expect("window entry must exist after commit")
2997        .suffixes
2998        .slots
2999        .len();
3000    assert!(
3001        last_suffix_slots >= 4096,
3002        "undersized pooled suffix store must not be reused for larger blocks"
3003    );
3004}
3005
3006#[test]
3007fn source_hint_clamps_driver_slice_size_to_window() {
3008    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3009    driver.set_source_size_hint(1024);
3010    driver.reset(CompressionLevel::Default);
3011
3012    let window = driver.window_size() as usize;
3013    assert_eq!(window, 1024);
3014    assert_eq!(driver.slice_size, window);
3015
3016    let space = driver.get_next_space();
3017    assert_eq!(space.len(), window);
3018    driver.commit_space(space);
3019}
3020
3021#[test]
3022fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
3023    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3024    driver.reset(CompressionLevel::Default);
3025
3026    let large = driver.get_next_space();
3027    let large_capacity = large.capacity();
3028    assert!(large_capacity >= 128 * 1024);
3029    driver.commit_space(large);
3030
3031    driver.set_source_size_hint(1024);
3032    driver.reset(CompressionLevel::Default);
3033
3034    let small = driver.get_next_space();
3035    assert_eq!(small.len(), 1024);
3036    assert!(
3037        small.capacity() >= large_capacity,
3038        "pooled buffer capacity should be preserved to avoid shrink/grow churn"
3039    );
3040}
3041
3042#[test]
3043fn driver_best_to_fastest_releases_oversized_hc_tables() {
3044    let mut driver = MatchGeneratorDriver::new(32, 2);
3045
3046    // Initialize at Best — allocates large HC tables (2M hash, 1M chain).
3047    driver.reset(CompressionLevel::Best);
3048    assert_eq!(driver.window_size(), (1u64 << 24));
3049
3050    // Feed data so tables are actually allocated via ensure_tables().
3051    let mut space = driver.get_next_space();
3052    space[..12].copy_from_slice(b"abcabcabcabc");
3053    space.truncate(12);
3054    driver.commit_space(space);
3055    driver.skip_matching();
3056
3057    // Switch to Fastest — must release HC tables.
3058    driver.reset(CompressionLevel::Fastest);
3059    assert_eq!(driver.window_size(), (1u64 << 17));
3060
3061    // HC matcher should have empty tables after backend switch.
3062    let hc = driver.hc_match_generator.as_ref().unwrap();
3063    assert!(
3064        hc.hash_table.is_empty(),
3065        "HC hash_table should be released after switching away from Best"
3066    );
3067    assert!(
3068        hc.chain_table.is_empty(),
3069        "HC chain_table should be released after switching away from Best"
3070    );
3071}
3072
3073#[test]
3074fn driver_better_to_best_resizes_hc_tables() {
3075    let mut driver = MatchGeneratorDriver::new(32, 2);
3076
3077    // Initialize at Better — allocates small HC tables (1M hash, 512K chain).
3078    driver.reset(CompressionLevel::Better);
3079    assert_eq!(driver.window_size(), (1u64 << 23));
3080
3081    let mut space = driver.get_next_space();
3082    space[..12].copy_from_slice(b"abcabcabcabc");
3083    space.truncate(12);
3084    driver.commit_space(space);
3085    driver.skip_matching();
3086
3087    let hc = driver.hc_match_generator.as_ref().unwrap();
3088    let better_hash_len = hc.hash_table.len();
3089    let better_chain_len = hc.chain_table.len();
3090
3091    // Switch to Best — must resize to larger tables.
3092    driver.reset(CompressionLevel::Best);
3093    assert_eq!(driver.window_size(), (1u64 << 24));
3094
3095    // Feed data to trigger ensure_tables with new sizes.
3096    let mut space = driver.get_next_space();
3097    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3098    space.truncate(12);
3099    driver.commit_space(space);
3100    driver.skip_matching();
3101
3102    let hc = driver.hc_match_generator.as_ref().unwrap();
3103    assert!(
3104        hc.hash_table.len() > better_hash_len,
3105        "Best hash_table ({}) should be larger than Better ({})",
3106        hc.hash_table.len(),
3107        better_hash_len
3108    );
3109    assert!(
3110        hc.chain_table.len() > better_chain_len,
3111        "Best chain_table ({}) should be larger than Better ({})",
3112        hc.chain_table.len(),
3113        better_chain_len
3114    );
3115}
3116
3117#[test]
3118fn prime_with_dictionary_preserves_history_for_first_full_block() {
3119    let mut driver = MatchGeneratorDriver::new(8, 1);
3120    driver.reset(CompressionLevel::Fastest);
3121
3122    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3123
3124    let mut space = driver.get_next_space();
3125    space.clear();
3126    space.extend_from_slice(b"abcdefgh");
3127    driver.commit_space(space);
3128
3129    let mut saw_match = false;
3130    driver.start_matching(|seq| {
3131        if let Sequence::Triple {
3132            literals,
3133            offset,
3134            match_len,
3135        } = seq
3136            && literals.is_empty()
3137            && offset == 8
3138            && match_len >= MIN_MATCH_LEN
3139        {
3140            saw_match = true;
3141        }
3142    });
3143
3144    assert!(
3145        saw_match,
3146        "first full block should still match dictionary-primed history"
3147    );
3148}
3149
3150#[test]
3151fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
3152    let mut driver = MatchGeneratorDriver::new(8, 1);
3153    driver.reset(CompressionLevel::Fastest);
3154
3155    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3156
3157    let mut space = driver.get_next_space();
3158    space.clear();
3159    space.extend_from_slice(b"abcdefgh");
3160    driver.commit_space(space);
3161
3162    let mut saw_match = false;
3163    driver.start_matching(|seq| {
3164        if let Sequence::Triple {
3165            literals,
3166            offset,
3167            match_len,
3168        } = seq
3169            && literals.is_empty()
3170            && offset == 24
3171            && match_len >= MIN_MATCH_LEN
3172        {
3173            saw_match = true;
3174        }
3175    });
3176
3177    assert!(
3178        saw_match,
3179        "dictionary bytes should remain addressable until frame output exceeds the live window"
3180    );
3181}
3182
3183#[test]
3184fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
3185    let mut driver = MatchGeneratorDriver::new(8, 1);
3186    driver.reset(CompressionLevel::Fastest);
3187
3188    driver.prime_with_dictionary(&[], [11, 7, 3]);
3189
3190    assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
3191}
3192
3193#[test]
3194fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
3195    let mut driver = MatchGeneratorDriver::new(8, 1);
3196    driver.reset(CompressionLevel::Level(2));
3197
3198    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3199
3200    let mut space = driver.get_next_space();
3201    space.clear();
3202    space.extend_from_slice(b"abcdefgh");
3203    driver.commit_space(space);
3204
3205    let mut saw_match = false;
3206    driver.start_matching(|seq| {
3207        if let Sequence::Triple {
3208            literals,
3209            offset,
3210            match_len,
3211        } = seq
3212            && literals.is_empty()
3213            && offset == 8
3214            && match_len >= DFAST_MIN_MATCH_LEN
3215        {
3216            saw_match = true;
3217        }
3218    });
3219
3220    assert!(
3221        saw_match,
3222        "dfast backend should match dictionary-primed history in first full block"
3223    );
3224}
3225
3226#[test]
3227fn prime_with_dictionary_does_not_inflate_reported_window_size() {
3228    let mut driver = MatchGeneratorDriver::new(8, 1);
3229    driver.reset(CompressionLevel::Fastest);
3230
3231    let before = driver.window_size();
3232    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3233    let after = driver.window_size();
3234
3235    assert_eq!(
3236        after, before,
3237        "dictionary retention budget must not change reported frame window size"
3238    );
3239}
3240
3241#[test]
3242fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
3243    let mut driver = MatchGeneratorDriver::new(8, 2);
3244    driver.reset(CompressionLevel::Fastest);
3245
3246    // This dictionary leaves a 1-byte tail chunk (capacity=1 suffix table),
3247    // which should never be committed to the matcher window.
3248    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3249
3250    assert!(
3251        driver
3252            .match_generator
3253            .window
3254            .iter()
3255            .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
3256        "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
3257    );
3258}
3259
3260#[test]
3261fn prime_with_dictionary_counts_only_committed_tail_budget() {
3262    let mut driver = MatchGeneratorDriver::new(8, 1);
3263    driver.reset(CompressionLevel::Fastest);
3264
3265    let before = driver.match_generator.max_window_size;
3266    // One full slice plus a 1-byte tail that cannot be committed.
3267    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3268
3269    assert_eq!(
3270        driver.match_generator.max_window_size,
3271        before + 8,
3272        "retention budget must account only for dictionary bytes actually committed to history"
3273    );
3274}
3275
3276#[test]
3277fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
3278    let mut driver = MatchGeneratorDriver::new(8, 1);
3279    driver.reset(CompressionLevel::Level(2));
3280
3281    let before = driver.dfast_matcher().max_window_size;
3282    // One full slice plus a 4-byte tail. Dfast can still use this tail through
3283    // short-hash overlap into the next block, so it should stay retained.
3284    driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
3285
3286    assert_eq!(
3287        driver.dfast_matcher().max_window_size,
3288        before + 12,
3289        "dfast retention budget should include 4-byte dictionary tails"
3290    );
3291}
3292
3293#[test]
3294fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
3295    let mut driver = MatchGeneratorDriver::new(8, 1);
3296    driver.reset(CompressionLevel::Level(4));
3297
3298    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3299
3300    let mut space = driver.get_next_space();
3301    space.clear();
3302    space.extend_from_slice(b"abcdefgh");
3303    driver.commit_space(space);
3304
3305    let mut saw_match = false;
3306    driver.start_matching(|seq| {
3307        if let Sequence::Triple {
3308            literals,
3309            offset,
3310            match_len,
3311        } = seq
3312            && literals.is_empty()
3313            && offset == 8
3314            && match_len >= ROW_MIN_MATCH_LEN
3315        {
3316            saw_match = true;
3317        }
3318    });
3319
3320    assert!(
3321        saw_match,
3322        "row backend should match dictionary-primed history in first full block"
3323    );
3324}
3325
3326#[test]
3327fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
3328    let mut driver = MatchGeneratorDriver::new(8, 1);
3329    driver.reset(CompressionLevel::Level(4));
3330
3331    let base_window = driver.row_matcher().max_window_size;
3332    // Slice size is 8. The trailing byte cannot be committed (<4 tail),
3333    // so it must be subtracted from retained budget.
3334    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3335
3336    assert_eq!(
3337        driver.row_matcher().max_window_size,
3338        base_window + 8,
3339        "row retained window must exclude uncommitted 1-byte tail"
3340    );
3341}
3342
3343#[test]
3344fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
3345    let mut driver = MatchGeneratorDriver::new(8, 1);
3346    driver.reset(CompressionLevel::Level(4));
3347    // Keep live window tiny so dictionary-primed slices are evicted quickly.
3348    driver.row_matcher_mut().max_window_size = 8;
3349    driver.reported_window_size = 8;
3350
3351    let base_window = driver.row_matcher().max_window_size;
3352    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3353    assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
3354
3355    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3356        let mut space = driver.get_next_space();
3357        space.clear();
3358        space.extend_from_slice(block);
3359        driver.commit_space(space);
3360        driver.skip_matching();
3361    }
3362
3363    assert_eq!(
3364        driver.dictionary_retained_budget, 0,
3365        "dictionary budget should be fully retired once primed dict slices are evicted"
3366    );
3367    assert_eq!(
3368        driver.row_matcher().max_window_size,
3369        base_window,
3370        "retired dictionary budget must not remain reusable for live history"
3371    );
3372}
3373
3374#[test]
3375fn row_get_last_space_and_reset_to_fastest_clears_window() {
3376    let mut driver = MatchGeneratorDriver::new(8, 1);
3377    driver.reset(CompressionLevel::Level(4));
3378
3379    let mut space = driver.get_next_space();
3380    space.clear();
3381    space.extend_from_slice(b"row-data");
3382    driver.commit_space(space);
3383
3384    assert_eq!(driver.get_last_space(), b"row-data");
3385
3386    driver.reset(CompressionLevel::Fastest);
3387    assert_eq!(driver.active_backend, MatcherBackend::Simple);
3388    assert!(driver.row_matcher().window.is_empty());
3389}
3390
3391/// Ensures switching from Row to Simple returns pooled buffers and row tables.
3392#[test]
3393fn driver_reset_from_row_backend_reclaims_row_buffer_pool() {
3394    let mut driver = MatchGeneratorDriver::new(8, 1);
3395    driver.reset(CompressionLevel::Level(4));
3396    assert_eq!(driver.active_backend, MatcherBackend::Row);
3397
3398    // Ensure the row matcher option is initialized so reset() executes
3399    // the Row backend retirement path.
3400    let _ = driver.row_matcher();
3401    let mut space = driver.get_next_space();
3402    space.extend_from_slice(b"row-data-to-recycle");
3403    driver.commit_space(space);
3404
3405    let before_pool = driver.vec_pool.len();
3406    driver.reset(CompressionLevel::Fastest);
3407
3408    assert_eq!(driver.active_backend, MatcherBackend::Simple);
3409    let row = driver
3410        .row_match_generator
3411        .as_ref()
3412        .expect("row matcher should remain allocated after switch");
3413    assert!(row.row_heads.is_empty());
3414    assert!(row.row_positions.is_empty());
3415    assert!(row.row_tags.is_empty());
3416    assert!(
3417        driver.vec_pool.len() >= before_pool,
3418        "row reset should recycle row history buffers"
3419    );
3420}
3421
3422/// Guards the optional row backend retirement path when no row matcher was allocated.
3423#[test]
3424fn driver_reset_from_row_backend_tolerates_missing_row_matcher() {
3425    let mut driver = MatchGeneratorDriver::new(8, 1);
3426    driver.active_backend = MatcherBackend::Row;
3427    driver.row_match_generator = None;
3428
3429    driver.reset(CompressionLevel::Fastest);
3430
3431    assert_eq!(driver.active_backend, MatcherBackend::Simple);
3432}
3433
3434#[test]
3435fn adjust_params_for_zero_source_size_uses_min_window_floor() {
3436    let mut params = resolve_level_params(CompressionLevel::Level(4), None);
3437    params.window_log = 22;
3438    let adjusted = adjust_params_for_source_size(params, 0);
3439    assert_eq!(adjusted.window_log, MIN_WINDOW_LOG);
3440}
3441
3442#[test]
3443fn row_pick_lazy_returns_none_when_next_is_better() {
3444    let mut matcher = RowMatchGenerator::new(1 << 22);
3445    matcher.configure(ROW_CONFIG);
3446    matcher.add_data(alloc::vec![b'a'; 64], |_| {});
3447    matcher.ensure_tables();
3448
3449    let abs_pos = matcher.history_abs_start + 16;
3450    let best = MatchCandidate {
3451        start: abs_pos,
3452        offset: 8,
3453        match_len: ROW_MIN_MATCH_LEN,
3454    };
3455    assert!(
3456        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3457        "lazy picker should defer when next position is clearly better"
3458    );
3459}
3460
3461#[test]
3462fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
3463    let mut matcher = RowMatchGenerator::new(1 << 22);
3464    matcher.configure(ROW_CONFIG);
3465    matcher.lazy_depth = 2;
3466    matcher.search_depth = 0;
3467    matcher.offset_hist = [6, 9, 1];
3468
3469    let mut data = alloc::vec![b'x'; 40];
3470    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
3471    matcher.add_data(data, |_| {});
3472    matcher.ensure_tables();
3473
3474    let abs_pos = matcher.history_abs_start + 20;
3475    let best = matcher
3476        .best_match(abs_pos, 0)
3477        .expect("expected baseline repcode match");
3478    assert_eq!(best.offset, 9);
3479    assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3480
3481    if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
3482        assert!(next.match_len <= best.match_len);
3483    }
3484
3485    let next2 = matcher
3486        .best_match(abs_pos + 2, 2)
3487        .expect("expected +2 candidate");
3488    assert!(
3489        next2.match_len > best.match_len + 1,
3490        "+2 candidate must be significantly better for depth-2 lazy skip"
3491    );
3492    assert!(
3493        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3494        "lazy picker should defer when +2 candidate is significantly better"
3495    );
3496}
3497
3498#[test]
3499fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
3500    let mut matcher = RowMatchGenerator::new(1 << 22);
3501    matcher.configure(ROW_CONFIG);
3502    matcher.lazy_depth = 2;
3503    matcher.search_depth = 0;
3504    matcher.offset_hist = [6, 9, 1];
3505
3506    let mut data = alloc::vec![b'x'; 40];
3507    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
3508    matcher.add_data(data, |_| {});
3509    matcher.ensure_tables();
3510
3511    let abs_pos = matcher.history_abs_start + 20;
3512    let best = matcher
3513        .best_match(abs_pos, 0)
3514        .expect("expected baseline repcode match");
3515    assert_eq!(best.offset, 9);
3516    assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3517
3518    let next2 = matcher
3519        .best_match(abs_pos + 2, 2)
3520        .expect("expected +2 candidate");
3521    assert_eq!(next2.match_len, best.match_len + 1);
3522    let chosen = matcher
3523        .pick_lazy_match(abs_pos, 0, Some(best))
3524        .expect("lazy picker should keep current best");
3525    assert_eq!(chosen.start, best.start);
3526    assert_eq!(chosen.offset, best.offset);
3527    assert_eq!(chosen.match_len, best.match_len);
3528}
3529
3530/// Verifies row/tag extraction uses the high bits of the multiplicative hash.
3531#[test]
3532fn row_hash_and_row_extracts_high_bits() {
3533    let mut matcher = RowMatchGenerator::new(1 << 22);
3534    matcher.configure(ROW_CONFIG);
3535    matcher.add_data(
3536        alloc::vec![
3537            0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
3538            0x70, 0x80,
3539        ],
3540        |_| {},
3541    );
3542    matcher.ensure_tables();
3543
3544    let pos = matcher.history_abs_start + 8;
3545    let (row, tag) = matcher
3546        .hash_and_row(pos)
3547        .expect("row hash should be available");
3548
3549    let idx = pos - matcher.history_abs_start;
3550    let concat = matcher.live_history();
3551    let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
3552    const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
3553    let hash = value.wrapping_mul(PRIME);
3554    let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
3555    let combined = hash >> (u64::BITS as usize - total_bits);
3556    let expected_row =
3557        ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
3558    let expected_tag = combined as u8;
3559
3560    assert_eq!(row, expected_row);
3561    assert_eq!(tag, expected_tag);
3562}
3563
3564#[test]
3565fn row_repcode_skips_candidate_before_history_start() {
3566    let mut matcher = RowMatchGenerator::new(1 << 22);
3567    matcher.configure(ROW_CONFIG);
3568    matcher.history = alloc::vec![b'a'; 20];
3569    matcher.history_start = 0;
3570    matcher.history_abs_start = 10;
3571    matcher.offset_hist = [3, 0, 0];
3572
3573    assert!(matcher.repcode_candidate(12, 1).is_none());
3574}
3575
3576#[test]
3577fn row_repcode_returns_none_when_position_too_close_to_history_end() {
3578    let mut matcher = RowMatchGenerator::new(1 << 22);
3579    matcher.configure(ROW_CONFIG);
3580    matcher.history = b"abcde".to_vec();
3581    matcher.history_start = 0;
3582    matcher.history_abs_start = 0;
3583    matcher.offset_hist = [1, 0, 0];
3584
3585    assert!(matcher.repcode_candidate(4, 1).is_none());
3586}
3587
3588#[test]
3589fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
3590    let mut matcher = RowMatchGenerator::new(1 << 22);
3591    matcher.configure(ROW_CONFIG);
3592    matcher.history = b"abcde".to_vec();
3593    matcher.history_start = 0;
3594    matcher.history_abs_start = 0;
3595
3596    assert!(matcher.row_candidate(0, 0).is_none());
3597}
3598
3599#[test]
3600fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
3601    let mut hc = HcMatchGenerator::new(32);
3602    hc.history = b"abc".to_vec();
3603    hc.history_start = 0;
3604    hc.history_abs_start = 0;
3605    hc.ensure_tables();
3606
3607    let candidates = hc.chain_candidates(0);
3608    assert!(candidates.iter().all(|&pos| pos == usize::MAX));
3609}
3610
3611#[test]
3612fn hc_reset_refills_existing_tables_with_empty_sentinel() {
3613    let mut hc = HcMatchGenerator::new(32);
3614    hc.add_data(b"abcdeabcde".to_vec(), |_| {});
3615    hc.ensure_tables();
3616    assert!(!hc.hash_table.is_empty());
3617    assert!(!hc.chain_table.is_empty());
3618    hc.hash_table.fill(123);
3619    hc.chain_table.fill(456);
3620
3621    hc.reset(|_| {});
3622
3623    assert!(hc.hash_table.iter().all(|&v| v == HC_EMPTY));
3624    assert!(hc.chain_table.iter().all(|&v| v == HC_EMPTY));
3625}
3626
3627#[test]
3628fn hc_start_matching_returns_early_for_empty_current_block() {
3629    let mut hc = HcMatchGenerator::new(32);
3630    hc.add_data(Vec::new(), |_| {});
3631    let mut called = false;
3632    hc.start_matching(|_| called = true);
3633    assert!(!called, "empty current block should not emit sequences");
3634}
3635
3636#[test]
3637fn hc_compact_history_drains_when_threshold_crossed() {
3638    let mut hc = HcMatchGenerator::new(8);
3639    hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
3640    hc.history_start = 16;
3641    hc.compact_history();
3642    assert_eq!(hc.history_start, 0);
3643    assert_eq!(hc.history, b"qrstuvwxyz");
3644}
3645
3646#[test]
3647fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
3648    let mut hc = HcMatchGenerator::new(32);
3649    hc.history = b"abcdefghijklmnop".to_vec();
3650    hc.history_abs_start = 0;
3651    hc.position_base = 1;
3652    hc.ensure_tables();
3653    let before_hash = hc.hash_table.clone();
3654    let before_chain = hc.chain_table.clone();
3655
3656    hc.insert_position_no_rebase(0);
3657
3658    assert_eq!(hc.hash_table, before_hash);
3659    assert_eq!(hc.chain_table, before_chain);
3660}
3661
3662#[test]
3663fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
3664    let mut driver = MatchGeneratorDriver::new(8, 1);
3665    driver.reset(CompressionLevel::Fastest);
3666    // Use a small live window so dictionary-primed slices are evicted
3667    // quickly and budget retirement can be asserted deterministically.
3668    driver.match_generator.max_window_size = 8;
3669    driver.reported_window_size = 8;
3670
3671    let base_window = driver.match_generator.max_window_size;
3672    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3673    assert_eq!(driver.match_generator.max_window_size, base_window + 24);
3674
3675    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3676        let mut space = driver.get_next_space();
3677        space.clear();
3678        space.extend_from_slice(block);
3679        driver.commit_space(space);
3680        driver.skip_matching();
3681    }
3682
3683    assert_eq!(
3684        driver.dictionary_retained_budget, 0,
3685        "dictionary budget should be fully retired once primed dict slices are evicted"
3686    );
3687    assert_eq!(
3688        driver.match_generator.max_window_size, base_window,
3689        "retired dictionary budget must not remain reusable for live history"
3690    );
3691}
3692
3693#[test]
3694fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
3695    let mut driver = MatchGeneratorDriver::new(8, 1);
3696    driver.reset(CompressionLevel::Level(2));
3697    // Use a small live window in this regression so dictionary-primed slices are
3698    // evicted quickly and budget retirement can be asserted deterministically.
3699    driver.dfast_matcher_mut().max_window_size = 8;
3700    driver.reported_window_size = 8;
3701
3702    let base_window = driver.dfast_matcher().max_window_size;
3703    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3704    assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
3705
3706    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3707        let mut space = driver.get_next_space();
3708        space.clear();
3709        space.extend_from_slice(block);
3710        driver.commit_space(space);
3711        driver.skip_matching();
3712    }
3713
3714    assert_eq!(
3715        driver.dictionary_retained_budget, 0,
3716        "dictionary budget should be fully retired once primed dict slices are evicted"
3717    );
3718    assert_eq!(
3719        driver.dfast_matcher().max_window_size,
3720        base_window,
3721        "retired dictionary budget must not remain reusable for live history"
3722    );
3723}
3724
3725#[test]
3726fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
3727    let mut driver = MatchGeneratorDriver::new(8, 1);
3728    driver.reset(CompressionLevel::Better);
3729
3730    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3731
3732    let mut space = driver.get_next_space();
3733    space.clear();
3734    // Repeat the dictionary content so the HC matcher can find it.
3735    // HC_MIN_MATCH_LEN is 5, so an 8-byte match is well above threshold.
3736    space.extend_from_slice(b"abcdefgh");
3737    driver.commit_space(space);
3738
3739    let mut saw_match = false;
3740    driver.start_matching(|seq| {
3741        if let Sequence::Triple {
3742            literals,
3743            offset,
3744            match_len,
3745        } = seq
3746            && literals.is_empty()
3747            && offset == 8
3748            && match_len >= HC_MIN_MATCH_LEN
3749        {
3750            saw_match = true;
3751        }
3752    });
3753
3754    assert!(
3755        saw_match,
3756        "hash-chain backend should match dictionary-primed history in first full block"
3757    );
3758}
3759
3760#[test]
3761fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
3762    let mut driver = MatchGeneratorDriver::new(8, 1);
3763    driver.reset(CompressionLevel::Better);
3764    // Use a small live window so dictionary-primed slices are evicted quickly.
3765    driver.hc_matcher_mut().max_window_size = 8;
3766    driver.reported_window_size = 8;
3767
3768    let base_window = driver.hc_matcher().max_window_size;
3769    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3770    assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
3771
3772    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3773        let mut space = driver.get_next_space();
3774        space.clear();
3775        space.extend_from_slice(block);
3776        driver.commit_space(space);
3777        driver.skip_matching();
3778    }
3779
3780    assert_eq!(
3781        driver.dictionary_retained_budget, 0,
3782        "dictionary budget should be fully retired once primed dict slices are evicted"
3783    );
3784    assert_eq!(
3785        driver.hc_matcher().max_window_size,
3786        base_window,
3787        "retired dictionary budget must not remain reusable for live history"
3788    );
3789}
3790
3791#[test]
3792fn hc_rebases_positions_after_u32_boundary() {
3793    let mut matcher = HcMatchGenerator::new(64);
3794    matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3795    matcher.ensure_tables();
3796    matcher.position_base = 0;
3797    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
3798        Ok(value) => value,
3799        Err(_) => return,
3800    };
3801    // Simulate a long-running stream where absolute history positions crossed
3802    // the u32 range. Before #51 this disabled HC inserts entirely.
3803    matcher.history_abs_start = history_abs_start;
3804    matcher.skip_matching();
3805    assert_eq!(
3806        matcher.position_base, matcher.history_abs_start,
3807        "rebase should anchor to the oldest live absolute position"
3808    );
3809
3810    assert!(
3811        matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY),
3812        "HC hash table should still be populated after crossing u32 boundary"
3813    );
3814
3815    // Verify rebasing preserves candidate lookup, not just table population.
3816    let abs_pos = matcher.history_abs_start + 10;
3817    let candidates = matcher.chain_candidates(abs_pos);
3818    assert!(
3819        candidates.iter().any(|candidate| *candidate != usize::MAX),
3820        "chain_candidates should return valid matches after rebase"
3821    );
3822}
3823
3824#[test]
3825fn hc_rebase_rebuilds_only_inserted_prefix() {
3826    let mut matcher = HcMatchGenerator::new(64);
3827    matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3828    matcher.ensure_tables();
3829    matcher.position_base = 0;
3830    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
3831        Ok(value) => value,
3832        Err(_) => return,
3833    };
3834    matcher.history_abs_start = history_abs_start;
3835    let abs_pos = matcher.history_abs_start + 6;
3836
3837    let mut expected = HcMatchGenerator::new(64);
3838    expected.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3839    expected.ensure_tables();
3840    expected.history_abs_start = history_abs_start;
3841    expected.position_base = expected.history_abs_start;
3842    expected.hash_table.fill(HC_EMPTY);
3843    expected.chain_table.fill(HC_EMPTY);
3844    for pos in expected.history_abs_start..abs_pos {
3845        expected.insert_position_no_rebase(pos);
3846    }
3847
3848    matcher.maybe_rebase_positions(abs_pos);
3849
3850    assert_eq!(
3851        matcher.position_base, matcher.history_abs_start,
3852        "rebase should still anchor to the oldest live absolute position"
3853    );
3854    assert_eq!(
3855        matcher.hash_table, expected.hash_table,
3856        "rebase must rebuild only positions already inserted before abs_pos"
3857    );
3858    assert_eq!(
3859        matcher.chain_table, expected.chain_table,
3860        "future positions must not be pre-seeded into HC chains during rebase"
3861    );
3862}
3863
3864#[test]
3865fn suffix_store_with_single_slot_does_not_panic_on_keying() {
3866    let mut suffixes = SuffixStore::with_capacity(1);
3867    suffixes.insert(b"abcde", 0);
3868    assert!(suffixes.contains_key(b"abcde"));
3869    assert_eq!(suffixes.get(b"abcde"), Some(0));
3870}
3871
3872#[test]
3873fn fastest_reset_uses_interleaved_hash_fill_step() {
3874    let mut driver = MatchGeneratorDriver::new(32, 2);
3875
3876    driver.reset(CompressionLevel::Uncompressed);
3877    assert_eq!(driver.match_generator.hash_fill_step, 1);
3878
3879    driver.reset(CompressionLevel::Fastest);
3880    assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
3881
3882    // Better uses the HashChain backend with lazy2; verify that the backend switch
3883    // happened and the lazy_depth is configured correctly.
3884    driver.reset(CompressionLevel::Better);
3885    assert_eq!(driver.active_backend, MatcherBackend::HashChain);
3886    assert_eq!(driver.window_size(), (1u64 << 23));
3887    assert_eq!(driver.hc_matcher().lazy_depth, 2);
3888}
3889
3890#[test]
3891fn simple_matcher_updates_offset_history_after_emitting_match() {
3892    let mut matcher = MatchGenerator::new(64);
3893    matcher.add_data(
3894        b"abcdeabcdeabcde".to_vec(),
3895        SuffixStore::with_capacity(64),
3896        |_, _| {},
3897    );
3898
3899    assert!(matcher.next_sequence(|seq| {
3900        assert_eq!(
3901            seq,
3902            Sequence::Triple {
3903                literals: b"abcde",
3904                offset: 5,
3905                match_len: 10,
3906            }
3907        );
3908    }));
3909    assert_eq!(matcher.offset_hist, [5, 1, 4]);
3910}
3911
3912#[test]
3913fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
3914    let mut matcher = MatchGenerator::new(64);
3915    matcher.add_data(
3916        b"abcdefghijabcdefghij".to_vec(),
3917        SuffixStore::with_capacity(64),
3918        |_, _| {},
3919    );
3920
3921    matcher.suffix_idx = 10;
3922    matcher.last_idx_in_sequence = 10;
3923    matcher.offset_hist = [99, 10, 4];
3924
3925    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3926    assert_eq!(candidate, Some((10, 10)));
3927}
3928
3929#[test]
3930fn simple_matcher_repcode_can_target_previous_window_entry() {
3931    let mut matcher = MatchGenerator::new(64);
3932    matcher.add_data(
3933        b"abcdefghij".to_vec(),
3934        SuffixStore::with_capacity(64),
3935        |_, _| {},
3936    );
3937    matcher.skip_matching();
3938    matcher.add_data(
3939        b"abcdefghij".to_vec(),
3940        SuffixStore::with_capacity(64),
3941        |_, _| {},
3942    );
3943
3944    matcher.offset_hist = [99, 10, 4];
3945
3946    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
3947    assert_eq!(candidate, Some((10, 10)));
3948}
3949
3950#[test]
3951fn simple_matcher_zero_literal_repcode_checks_rep2() {
3952    let mut matcher = MatchGenerator::new(64);
3953    matcher.add_data(
3954        b"abcdefghijabcdefghij".to_vec(),
3955        SuffixStore::with_capacity(64),
3956        |_, _| {},
3957    );
3958    matcher.suffix_idx = 10;
3959    matcher.last_idx_in_sequence = 10;
3960    // rep1=4 does not match at idx 10, rep2=10 does.
3961    matcher.offset_hist = [99, 4, 10];
3962
3963    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3964    assert_eq!(candidate, Some((10, 10)));
3965}
3966
3967#[test]
3968fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
3969    let mut matcher = MatchGenerator::new(64);
3970    matcher.add_data(
3971        b"abcdefghijabcdefghij".to_vec(),
3972        SuffixStore::with_capacity(64),
3973        |_, _| {},
3974    );
3975    matcher.suffix_idx = 10;
3976    matcher.last_idx_in_sequence = 10;
3977    // rep1=4 and rep2=99 do not match; rep0-1 == 10 does.
3978    matcher.offset_hist = [11, 4, 99];
3979
3980    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3981    assert_eq!(candidate, Some((10, 10)));
3982}
3983
3984#[test]
3985fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
3986    let mut matcher = MatchGenerator::new(64);
3987    matcher.add_data(
3988        b"abcdefghij".to_vec(),
3989        SuffixStore::with_capacity(64),
3990        |_, _| {},
3991    );
3992    matcher.skip_matching();
3993    matcher.add_data(
3994        b"klmnopqrst".to_vec(),
3995        SuffixStore::with_capacity(64),
3996        |_, _| {},
3997    );
3998    matcher.suffix_idx = 3;
3999
4000    let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
4001    assert_eq!(candidate, None);
4002}
4003
4004#[test]
4005fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
4006    let mut matcher = MatchGenerator::new(64);
4007    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4008    matcher.add_data(
4009        b"abcdefghijklmnop".to_vec(),
4010        SuffixStore::with_capacity(64),
4011        |_, _| {},
4012    );
4013    matcher.skip_matching();
4014    matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
4015
4016    assert!(matcher.next_sequence(|seq| {
4017        assert_eq!(
4018            seq,
4019            Sequence::Triple {
4020                literals: b"",
4021                offset: 15,
4022                match_len: 5,
4023            }
4024        );
4025    }));
4026    assert!(!matcher.next_sequence(|_| {}));
4027}
4028
4029#[test]
4030fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
4031    let mut matcher = MatchGenerator::new(64);
4032    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4033    matcher.add_data(
4034        b"01234abcde".to_vec(),
4035        SuffixStore::with_capacity(64),
4036        |_, _| {},
4037    );
4038    matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
4039
4040    let last = matcher.window.last().unwrap();
4041    let tail = &last.data[5..10];
4042    assert_eq!(last.suffixes.get(tail), Some(5));
4043}
4044
4045#[test]
4046fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
4047    let mut matcher = MatchGenerator::new(128);
4048    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4049    matcher.add_data(
4050        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4051        SuffixStore::with_capacity(1 << 16),
4052        |_, _| {},
4053    );
4054
4055    matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
4056
4057    let last = matcher.window.last().unwrap();
4058    let first_key = &last.data[..MIN_MATCH_LEN];
4059    assert_eq!(last.suffixes.get(first_key), None);
4060}
4061
4062#[test]
4063fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
4064    let mut matcher = MatchGenerator::new(128);
4065    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4066    matcher.add_data(
4067        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4068        SuffixStore::with_capacity(1 << 16),
4069        |_, _| {},
4070    );
4071
4072    matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
4073
4074    let last = matcher.window.last().unwrap();
4075    for pos in [0usize, 3, 6, 9, 12] {
4076        let key = &last.data[pos..pos + MIN_MATCH_LEN];
4077        assert_eq!(
4078            last.suffixes.get(key),
4079            Some(pos),
4080            "expected interleaved suffix registration at pos {pos}"
4081        );
4082    }
4083}
4084
4085#[test]
4086fn dfast_skip_matching_handles_window_eviction() {
4087    let mut matcher = DfastMatchGenerator::new(16);
4088
4089    matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
4090    matcher.skip_matching();
4091    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4092    matcher.skip_matching();
4093    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4094
4095    let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
4096    matcher.start_matching(|seq| match seq {
4097        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4098        Sequence::Triple {
4099            literals,
4100            offset,
4101            match_len,
4102        } => {
4103            reconstructed.extend_from_slice(literals);
4104            let start = reconstructed.len() - offset;
4105            for i in 0..match_len {
4106                let byte = reconstructed[start + i];
4107                reconstructed.push(byte);
4108            }
4109        }
4110    });
4111
4112    assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
4113}
4114
4115#[test]
4116fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
4117    let mut matcher = DfastMatchGenerator::new(8);
4118
4119    let mut first = Vec::with_capacity(64);
4120    first.extend_from_slice(b"abcdefgh");
4121    matcher.add_data(first, |_| {});
4122
4123    let mut second = Vec::with_capacity(64);
4124    second.extend_from_slice(b"ijklmnop");
4125
4126    let mut observed_evicted_len = None;
4127    matcher.add_data(second, |data| {
4128        observed_evicted_len = Some(data.len());
4129    });
4130
4131    assert_eq!(
4132        observed_evicted_len,
4133        Some(8),
4134        "eviction callback must report evicted byte length, not backing capacity"
4135    );
4136}
4137
4138#[test]
4139fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
4140    let mut matcher = DfastMatchGenerator::new(16);
4141
4142    let mut first = Vec::with_capacity(64);
4143    first.extend_from_slice(b"abcdefgh");
4144    matcher.add_data(first, |_| {});
4145
4146    let mut second = Vec::with_capacity(64);
4147    second.extend_from_slice(b"ijklmnop");
4148    matcher.add_data(second, |_| {});
4149
4150    matcher.max_window_size = 8;
4151
4152    let mut observed_evicted_len = None;
4153    matcher.trim_to_window(|data| {
4154        observed_evicted_len = Some(data.len());
4155    });
4156
4157    assert_eq!(
4158        observed_evicted_len,
4159        Some(8),
4160        "trim callback must report evicted byte length, not backing capacity"
4161    );
4162}
4163
4164#[test]
4165fn dfast_inserts_tail_positions_for_next_block_matching() {
4166    let mut matcher = DfastMatchGenerator::new(1 << 22);
4167
4168    matcher.add_data(b"012345bcdea".to_vec(), |_| {});
4169    let mut history = Vec::new();
4170    matcher.start_matching(|seq| match seq {
4171        Sequence::Literals { literals } => history.extend_from_slice(literals),
4172        Sequence::Triple { .. } => unreachable!("first block should not match history"),
4173    });
4174    assert_eq!(history, b"012345bcdea");
4175
4176    matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
4177    let mut saw_first_sequence = false;
4178    matcher.start_matching(|seq| {
4179        assert!(!saw_first_sequence, "expected a single cross-block match");
4180        saw_first_sequence = true;
4181        match seq {
4182            Sequence::Literals { .. } => {
4183                panic!("expected tail-anchored cross-block match before any literals")
4184            }
4185            Sequence::Triple {
4186                literals,
4187                offset,
4188                match_len,
4189            } => {
4190                assert_eq!(literals, b"");
4191                assert_eq!(offset, 5);
4192                assert_eq!(match_len, 11);
4193                let start = history.len() - offset;
4194                for i in 0..match_len {
4195                    let byte = history[start + i];
4196                    history.push(byte);
4197                }
4198            }
4199        }
4200    });
4201
4202    assert!(
4203        saw_first_sequence,
4204        "expected tail-anchored cross-block match"
4205    );
4206    assert_eq!(history, b"012345bcdeabcdeabcdeab");
4207}