Skip to main content

structured_zstd/encoding/
match_generator.rs

1//! Matching algorithm used find repeated parts in the original data
2//!
3//! The Zstd format relies on finden repeated sequences of data and compressing these sequences as instructions to the decoder.
4//! A sequence basically tells the decoder "Go back X bytes and copy Y bytes to the end of your decode buffer".
5//!
6//! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data.
7
8use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10use core::convert::TryInto;
11use core::num::NonZeroUsize;
12
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17
18const MIN_MATCH_LEN: usize = 5;
19const FAST_HASH_FILL_STEP: usize = 3;
20const DFAST_MIN_MATCH_LEN: usize = 6;
21const ROW_MIN_MATCH_LEN: usize = 6;
22const DFAST_TARGET_LEN: usize = 48;
23// Keep these aligned with the issue's zstd level-3/dfast target unless ratio
24// measurements show we can shrink them without regressing acceptance tests.
25const DFAST_HASH_BITS: usize = 20;
26const DFAST_SEARCH_DEPTH: usize = 4;
27const DFAST_EMPTY_SLOT: usize = usize::MAX;
28const ROW_HASH_BITS: usize = 20;
29const ROW_LOG: usize = 5;
30const ROW_SEARCH_DEPTH: usize = 16;
31const ROW_TARGET_LEN: usize = 48;
32const ROW_TAG_BITS: usize = 8;
33const ROW_EMPTY_SLOT: usize = usize::MAX;
34const ROW_HASH_KEY_LEN: usize = 4;
35
36const HC_HASH_LOG: usize = 20;
37const HC_CHAIN_LOG: usize = 19;
38const HC_SEARCH_DEPTH: usize = 16;
39const HC_MIN_MATCH_LEN: usize = 5;
40const HC_TARGET_LEN: usize = 48;
41// Positions are stored as (relative_pos + 1) so that 0 is a safe empty
42// sentinel that can never collide with any valid position.
43const HC_EMPTY: u32 = 0;
44
45// Maximum search depth across all HC-based levels. Used to size the
46// fixed-length candidate array returned by chain_candidates().
47const MAX_HC_SEARCH_DEPTH: usize = 32;
48
49/// Bundled tuning knobs for the hash-chain matcher. Using a typed config
50/// instead of positional `usize` args eliminates parameter-order hazards.
51#[derive(Copy, Clone)]
52struct HcConfig {
53    hash_log: usize,
54    chain_log: usize,
55    search_depth: usize,
56    target_len: usize,
57}
58
59#[derive(Copy, Clone)]
60struct RowConfig {
61    hash_bits: usize,
62    row_log: usize,
63    search_depth: usize,
64    target_len: usize,
65}
66
67const HC_CONFIG: HcConfig = HcConfig {
68    hash_log: HC_HASH_LOG,
69    chain_log: HC_CHAIN_LOG,
70    search_depth: HC_SEARCH_DEPTH,
71    target_len: HC_TARGET_LEN,
72};
73
74/// Best-level: deeper search, larger tables, higher target length.
75const BEST_HC_CONFIG: HcConfig = HcConfig {
76    hash_log: 21,
77    chain_log: 20,
78    search_depth: 32,
79    target_len: 128,
80};
81
82const ROW_CONFIG: RowConfig = RowConfig {
83    hash_bits: ROW_HASH_BITS,
84    row_log: ROW_LOG,
85    search_depth: ROW_SEARCH_DEPTH,
86    target_len: ROW_TARGET_LEN,
87};
88
89/// Resolved tuning parameters for a compression level.
90#[derive(Copy, Clone)]
91struct LevelParams {
92    backend: MatcherBackend,
93    window_log: u8,
94    hash_fill_step: usize,
95    lazy_depth: u8,
96    hc: HcConfig,
97    row: RowConfig,
98}
99
100fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
101    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
102    window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
103}
104
105fn row_hash_bits_for_window(max_window_size: usize) -> usize {
106    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
107    window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
108}
109
110/// Parameter table for numeric compression levels 1–22.
111///
112/// Each entry maps a zstd compression level to the best-available matcher
113/// backend and tuning knobs.  Levels that require strategies this crate does
114/// not implement (greedy, btopt, btultra) are approximated with the closest
115/// available backend.
116///
117/// Index 0 = level 1, index 21 = level 22.
118#[rustfmt::skip]
119const LEVEL_TABLE: [LevelParams; 22] = [
120    // Lvl  Strategy       wlog  step  lazy  HC config                                   row config
121    // ---  -------------- ----  ----  ----  ------------------------------------------  ----------
122    /* 1 */ LevelParams { backend: MatcherBackend::Simple,    window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
123    /* 2 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
124    /* 3 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
125    /* 4 */ LevelParams { backend: MatcherBackend::Row,       window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
126    /* 5 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4,  target_len: 32  }, row: ROW_CONFIG },
127    /* 6 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8,  target_len: 48  }, row: ROW_CONFIG },
128    /* 7 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48  }, row: ROW_CONFIG },
129    /* 8 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64  }, row: ROW_CONFIG },
130    /* 9 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64  }, row: ROW_CONFIG },
131    /*10 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96  }, row: ROW_CONFIG },
132    /*11 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
133    /*12 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
134    /*13 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
135    /*14 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
136    /*15 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
137    /*16 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
138    /*17 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
139    /*18 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
140    /*19 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
141    /*20 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
142    /*21 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
143    /*22 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
144];
145
146/// Smallest window_log the encoder will use regardless of source size.
147const MIN_WINDOW_LOG: u8 = 10;
148/// Conservative floor for source-size-hinted window tuning.
149///
150/// Hinted windows below 16 KiB (`window_log < 14`) currently regress C-FFI
151/// interoperability on certain compressed-block patterns. Keep hinted
152/// windows at 16 KiB or larger until that compatibility gap is closed.
153const MIN_HINTED_WINDOW_LOG: u8 = 14;
154
155/// Adjust level parameters for a known source size.
156///
157/// This derives a cap from `ceil(log2(src_size))`, then clamps it to
158/// [`MIN_HINTED_WINDOW_LOG`] (16 KiB). A zero-byte size hint is treated as
159/// [`MIN_WINDOW_LOG`] for the raw ceil-log step and then promoted to the hinted
160/// floor. This keeps tables bounded for small inputs while preserving the
161/// encoder's baseline minimum supported window.
162/// For the HC backend, `hash_log` and `chain_log` are reduced
163/// proportionally.
164fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
165    // Derive a source-size-based cap from ceil(log2(src_size)), then
166    // clamp first to MIN_WINDOW_LOG (baseline encoder minimum) and then to
167    // MIN_HINTED_WINDOW_LOG (16 KiB hinted floor). For tiny or zero hints we
168    // therefore keep a 16 KiB effective minimum window in hinted mode.
169    let src_log = if src_size == 0 {
170        MIN_WINDOW_LOG
171    } else {
172        (64 - (src_size - 1).leading_zeros()) as u8 // ceil_log2
173    };
174    let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
175    if src_log < params.window_log {
176        params.window_log = src_log;
177    }
178    // For HC backend: also cap hash_log and chain_log so tables are
179    // proportional to the source, avoiding multi-MB allocations for
180    // tiny inputs.
181    if params.backend == MatcherBackend::HashChain {
182        if (src_log + 2) < params.hc.hash_log as u8 {
183            params.hc.hash_log = (src_log + 2) as usize;
184        }
185        if (src_log + 1) < params.hc.chain_log as u8 {
186            params.hc.chain_log = (src_log + 1) as usize;
187        }
188    } else if params.backend == MatcherBackend::Row {
189        let max_window_size = 1usize << params.window_log;
190        params.row.hash_bits = row_hash_bits_for_window(max_window_size);
191    }
192    params
193}
194
195/// Resolve a [`CompressionLevel`] to internal tuning parameters,
196/// optionally adjusted for a known source size.
197fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
198    let params = match level {
199        CompressionLevel::Uncompressed => LevelParams {
200            backend: MatcherBackend::Simple,
201            window_log: 17,
202            hash_fill_step: 1,
203            lazy_depth: 0,
204            hc: HC_CONFIG,
205            row: ROW_CONFIG,
206        },
207        CompressionLevel::Fastest => LEVEL_TABLE[0],
208        CompressionLevel::Default => LEVEL_TABLE[2],
209        CompressionLevel::Better => LEVEL_TABLE[6],
210        CompressionLevel::Best => LEVEL_TABLE[10],
211        CompressionLevel::Level(n) => {
212            if n > 0 {
213                let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
214                LEVEL_TABLE[idx]
215            } else if n == 0 {
216                // Level 0 = default, matching C zstd semantics.
217                LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
218            } else {
219                // Negative levels: ultra-fast with the Simple backend.
220                // Acceleration grows with magnitude, expressed as larger
221                // hash_fill_step (fewer positions indexed).
222                let acceleration =
223                    (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
224                let step = (acceleration + 3).min(128);
225                LevelParams {
226                    backend: MatcherBackend::Simple,
227                    window_log: 17,
228                    hash_fill_step: step,
229                    lazy_depth: 0,
230                    hc: HC_CONFIG,
231                    row: ROW_CONFIG,
232                }
233            }
234        }
235    };
236    if let Some(size) = source_size {
237        adjust_params_for_source_size(params, size)
238    } else {
239        params
240    }
241}
242
243#[derive(Copy, Clone, Debug, PartialEq, Eq)]
244enum MatcherBackend {
245    Simple,
246    Dfast,
247    Row,
248    HashChain,
249}
250
251/// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible.
252pub struct MatchGeneratorDriver {
253    vec_pool: Vec<Vec<u8>>,
254    suffix_pool: Vec<SuffixStore>,
255    match_generator: MatchGenerator,
256    dfast_match_generator: Option<DfastMatchGenerator>,
257    row_match_generator: Option<RowMatchGenerator>,
258    hc_match_generator: Option<HcMatchGenerator>,
259    active_backend: MatcherBackend,
260    slice_size: usize,
261    base_slice_size: usize,
262    // Frame header window size must stay at the configured live-window budget.
263    // Dictionary retention expands internal matcher capacity only.
264    reported_window_size: usize,
265    // Tracks currently retained bytes that originated from primed dictionary
266    // history and have not been evicted yet.
267    dictionary_retained_budget: usize,
268    // Source size hint for next frame (set via set_source_size_hint, cleared on reset).
269    source_size_hint: Option<u64>,
270}
271
272impl MatchGeneratorDriver {
273    /// `slice_size` sets the base block allocation size used for matcher input chunks.
274    /// `max_slices_in_window` determines the initial window capacity at construction
275    /// time. Effective window sizing is recalculated on every [`reset`](Self::reset)
276    /// from the resolved compression level and optional source-size hint.
277    pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
278        let max_window_size = max_slices_in_window * slice_size;
279        Self {
280            vec_pool: Vec::new(),
281            suffix_pool: Vec::new(),
282            match_generator: MatchGenerator::new(max_window_size),
283            dfast_match_generator: None,
284            row_match_generator: None,
285            hc_match_generator: None,
286            active_backend: MatcherBackend::Simple,
287            slice_size,
288            base_slice_size: slice_size,
289            reported_window_size: max_window_size,
290            dictionary_retained_budget: 0,
291            source_size_hint: None,
292        }
293    }
294
295    fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
296        resolve_level_params(level, source_size)
297    }
298
299    fn dfast_matcher(&self) -> &DfastMatchGenerator {
300        self.dfast_match_generator
301            .as_ref()
302            .expect("dfast backend must be initialized by reset() before use")
303    }
304
305    fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
306        self.dfast_match_generator
307            .as_mut()
308            .expect("dfast backend must be initialized by reset() before use")
309    }
310
311    fn row_matcher(&self) -> &RowMatchGenerator {
312        self.row_match_generator
313            .as_ref()
314            .expect("row backend must be initialized by reset() before use")
315    }
316
317    fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
318        self.row_match_generator
319            .as_mut()
320            .expect("row backend must be initialized by reset() before use")
321    }
322
323    fn hc_matcher(&self) -> &HcMatchGenerator {
324        self.hc_match_generator
325            .as_ref()
326            .expect("hash chain backend must be initialized by reset() before use")
327    }
328
329    fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
330        self.hc_match_generator
331            .as_mut()
332            .expect("hash chain backend must be initialized by reset() before use")
333    }
334
335    fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
336        let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
337        if reclaimed == 0 {
338            return;
339        }
340        self.dictionary_retained_budget -= reclaimed;
341        match self.active_backend {
342            MatcherBackend::Simple => {
343                self.match_generator.max_window_size = self
344                    .match_generator
345                    .max_window_size
346                    .saturating_sub(reclaimed);
347            }
348            MatcherBackend::Dfast => {
349                let matcher = self.dfast_matcher_mut();
350                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
351            }
352            MatcherBackend::Row => {
353                let matcher = self.row_matcher_mut();
354                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
355            }
356            MatcherBackend::HashChain => {
357                let matcher = self.hc_matcher_mut();
358                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
359            }
360        }
361    }
362
363    fn trim_after_budget_retire(&mut self) {
364        loop {
365            let mut evicted_bytes = 0usize;
366            match self.active_backend {
367                MatcherBackend::Simple => {
368                    let vec_pool = &mut self.vec_pool;
369                    let suffix_pool = &mut self.suffix_pool;
370                    self.match_generator.reserve(0, |mut data, mut suffixes| {
371                        evicted_bytes += data.len();
372                        data.resize(data.capacity(), 0);
373                        vec_pool.push(data);
374                        suffixes.slots.clear();
375                        suffixes.slots.resize(suffixes.slots.capacity(), None);
376                        suffix_pool.push(suffixes);
377                    });
378                }
379                MatcherBackend::Dfast => {
380                    let mut retired = Vec::new();
381                    self.dfast_matcher_mut().trim_to_window(|data| {
382                        evicted_bytes += data.len();
383                        retired.push(data);
384                    });
385                    for mut data in retired {
386                        data.resize(data.capacity(), 0);
387                        self.vec_pool.push(data);
388                    }
389                }
390                MatcherBackend::Row => {
391                    let mut retired = Vec::new();
392                    self.row_matcher_mut().trim_to_window(|data| {
393                        evicted_bytes += data.len();
394                        retired.push(data);
395                    });
396                    for mut data in retired {
397                        data.resize(data.capacity(), 0);
398                        self.vec_pool.push(data);
399                    }
400                }
401                MatcherBackend::HashChain => {
402                    let mut retired = Vec::new();
403                    self.hc_matcher_mut().trim_to_window(|data| {
404                        evicted_bytes += data.len();
405                        retired.push(data);
406                    });
407                    for mut data in retired {
408                        data.resize(data.capacity(), 0);
409                        self.vec_pool.push(data);
410                    }
411                }
412            }
413            if evicted_bytes == 0 {
414                break;
415            }
416            self.retire_dictionary_budget(evicted_bytes);
417        }
418    }
419}
420
421impl Matcher for MatchGeneratorDriver {
422    fn supports_dictionary_priming(&self) -> bool {
423        true
424    }
425
426    fn set_source_size_hint(&mut self, size: u64) {
427        self.source_size_hint = Some(size);
428    }
429
430    fn reset(&mut self, level: CompressionLevel) {
431        let hint = self.source_size_hint.take();
432        let hinted = hint.is_some();
433        let params = Self::level_params(level, hint);
434        let max_window_size = 1usize << params.window_log;
435        self.dictionary_retained_budget = 0;
436        if self.active_backend != params.backend {
437            match self.active_backend {
438                MatcherBackend::Simple => {
439                    let vec_pool = &mut self.vec_pool;
440                    let suffix_pool = &mut self.suffix_pool;
441                    self.match_generator.reset(|mut data, mut suffixes| {
442                        data.resize(data.capacity(), 0);
443                        vec_pool.push(data);
444                        suffixes.slots.clear();
445                        suffixes.slots.resize(suffixes.slots.capacity(), None);
446                        suffix_pool.push(suffixes);
447                    });
448                }
449                MatcherBackend::Dfast => {
450                    if let Some(dfast) = self.dfast_match_generator.as_mut() {
451                        let vec_pool = &mut self.vec_pool;
452                        dfast.reset(|mut data| {
453                            data.resize(data.capacity(), 0);
454                            vec_pool.push(data);
455                        });
456                    }
457                }
458                MatcherBackend::Row => {
459                    if let Some(row) = self.row_match_generator.as_mut() {
460                        row.row_heads = Vec::new();
461                        row.row_positions = Vec::new();
462                        row.row_tags = Vec::new();
463                        let vec_pool = &mut self.vec_pool;
464                        row.reset(|mut data| {
465                            data.resize(data.capacity(), 0);
466                            vec_pool.push(data);
467                        });
468                    }
469                }
470                MatcherBackend::HashChain => {
471                    if let Some(hc) = self.hc_match_generator.as_mut() {
472                        // Release oversized tables when switching away from
473                        // HashChain so Best's larger allocations don't persist.
474                        hc.hash_table = Vec::new();
475                        hc.chain_table = Vec::new();
476                        let vec_pool = &mut self.vec_pool;
477                        hc.reset(|mut data| {
478                            data.resize(data.capacity(), 0);
479                            vec_pool.push(data);
480                        });
481                    }
482                }
483            }
484        }
485
486        self.active_backend = params.backend;
487        self.slice_size = self.base_slice_size.min(max_window_size);
488        self.reported_window_size = max_window_size;
489        match self.active_backend {
490            MatcherBackend::Simple => {
491                let vec_pool = &mut self.vec_pool;
492                let suffix_pool = &mut self.suffix_pool;
493                self.match_generator.max_window_size = max_window_size;
494                self.match_generator.hash_fill_step = params.hash_fill_step;
495                self.match_generator.reset(|mut data, mut suffixes| {
496                    data.resize(data.capacity(), 0);
497                    vec_pool.push(data);
498                    suffixes.slots.clear();
499                    suffixes.slots.resize(suffixes.slots.capacity(), None);
500                    suffix_pool.push(suffixes);
501                });
502            }
503            MatcherBackend::Dfast => {
504                let dfast = self
505                    .dfast_match_generator
506                    .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
507                dfast.max_window_size = max_window_size;
508                dfast.lazy_depth = params.lazy_depth;
509                dfast.set_hash_bits(if hinted {
510                    dfast_hash_bits_for_window(max_window_size)
511                } else {
512                    DFAST_HASH_BITS
513                });
514                let vec_pool = &mut self.vec_pool;
515                dfast.reset(|mut data| {
516                    data.resize(data.capacity(), 0);
517                    vec_pool.push(data);
518                });
519            }
520            MatcherBackend::Row => {
521                let row = self
522                    .row_match_generator
523                    .get_or_insert_with(|| RowMatchGenerator::new(max_window_size));
524                row.max_window_size = max_window_size;
525                row.lazy_depth = params.lazy_depth;
526                row.configure(params.row);
527                if hinted {
528                    row.set_hash_bits(row_hash_bits_for_window(max_window_size));
529                }
530                let vec_pool = &mut self.vec_pool;
531                row.reset(|mut data| {
532                    data.resize(data.capacity(), 0);
533                    vec_pool.push(data);
534                });
535            }
536            MatcherBackend::HashChain => {
537                let hc = self
538                    .hc_match_generator
539                    .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
540                hc.max_window_size = max_window_size;
541                hc.lazy_depth = params.lazy_depth;
542                hc.configure(params.hc);
543                let vec_pool = &mut self.vec_pool;
544                hc.reset(|mut data| {
545                    data.resize(data.capacity(), 0);
546                    vec_pool.push(data);
547                });
548            }
549        }
550    }
551
552    fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
553        match self.active_backend {
554            MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
555            MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
556            MatcherBackend::Row => self.row_matcher_mut().offset_hist = offset_hist,
557            MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
558        }
559
560        if dict_content.is_empty() {
561            return;
562        }
563
564        // Dictionary bytes should stay addressable until produced frame output
565        // itself exceeds the live window size.
566        let retained_dict_budget = dict_content.len();
567        match self.active_backend {
568            MatcherBackend::Simple => {
569                self.match_generator.max_window_size = self
570                    .match_generator
571                    .max_window_size
572                    .saturating_add(retained_dict_budget);
573            }
574            MatcherBackend::Dfast => {
575                let matcher = self.dfast_matcher_mut();
576                matcher.max_window_size =
577                    matcher.max_window_size.saturating_add(retained_dict_budget);
578            }
579            MatcherBackend::Row => {
580                let matcher = self.row_matcher_mut();
581                matcher.max_window_size =
582                    matcher.max_window_size.saturating_add(retained_dict_budget);
583            }
584            MatcherBackend::HashChain => {
585                let matcher = self.hc_matcher_mut();
586                matcher.max_window_size =
587                    matcher.max_window_size.saturating_add(retained_dict_budget);
588            }
589        }
590
591        let mut start = 0usize;
592        let mut committed_dict_budget = 0usize;
593        // insert_position needs 4 bytes of lookahead for hashing;
594        // backfill_boundary_positions re-visits tail positions once the
595        // next slice extends history, but cannot hash <4 byte fragments.
596        let min_primed_tail = match self.active_backend {
597            MatcherBackend::Simple => MIN_MATCH_LEN,
598            MatcherBackend::Dfast | MatcherBackend::Row | MatcherBackend::HashChain => 4,
599        };
600        while start < dict_content.len() {
601            let end = (start + self.slice_size).min(dict_content.len());
602            if end - start < min_primed_tail {
603                break;
604            }
605            let mut space = self.get_next_space();
606            space.clear();
607            space.extend_from_slice(&dict_content[start..end]);
608            self.commit_space(space);
609            self.skip_matching();
610            committed_dict_budget += end - start;
611            start = end;
612        }
613
614        let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
615        if uncommitted_tail_budget > 0 {
616            match self.active_backend {
617                MatcherBackend::Simple => {
618                    self.match_generator.max_window_size = self
619                        .match_generator
620                        .max_window_size
621                        .saturating_sub(uncommitted_tail_budget);
622                }
623                MatcherBackend::Dfast => {
624                    let matcher = self.dfast_matcher_mut();
625                    matcher.max_window_size = matcher
626                        .max_window_size
627                        .saturating_sub(uncommitted_tail_budget);
628                }
629                MatcherBackend::Row => {
630                    let matcher = self.row_matcher_mut();
631                    matcher.max_window_size = matcher
632                        .max_window_size
633                        .saturating_sub(uncommitted_tail_budget);
634                }
635                MatcherBackend::HashChain => {
636                    let matcher = self.hc_matcher_mut();
637                    matcher.max_window_size = matcher
638                        .max_window_size
639                        .saturating_sub(uncommitted_tail_budget);
640                }
641            }
642        }
643        if committed_dict_budget > 0 {
644            self.dictionary_retained_budget = self
645                .dictionary_retained_budget
646                .saturating_add(committed_dict_budget);
647        }
648    }
649
650    fn window_size(&self) -> u64 {
651        self.reported_window_size as u64
652    }
653
654    fn get_next_space(&mut self) -> Vec<u8> {
655        if let Some(mut space) = self.vec_pool.pop() {
656            if space.len() > self.slice_size {
657                space.truncate(self.slice_size);
658            }
659            if space.len() < self.slice_size {
660                space.resize(self.slice_size, 0);
661            }
662            return space;
663        }
664        alloc::vec![0; self.slice_size]
665    }
666
667    fn get_last_space(&mut self) -> &[u8] {
668        match self.active_backend {
669            MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
670            MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
671            MatcherBackend::Row => self.row_matcher().get_last_space(),
672            MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
673        }
674    }
675
676    fn commit_space(&mut self, space: Vec<u8>) {
677        match self.active_backend {
678            MatcherBackend::Simple => {
679                let vec_pool = &mut self.vec_pool;
680                let mut evicted_bytes = 0usize;
681                let suffixes = match self.suffix_pool.pop() {
682                    Some(store) if store.slots.len() >= space.len() => store,
683                    _ => SuffixStore::with_capacity(space.len()),
684                };
685                let suffix_pool = &mut self.suffix_pool;
686                self.match_generator
687                    .add_data(space, suffixes, |mut data, mut suffixes| {
688                        evicted_bytes += data.len();
689                        data.resize(data.capacity(), 0);
690                        vec_pool.push(data);
691                        suffixes.slots.clear();
692                        suffixes.slots.resize(suffixes.slots.capacity(), None);
693                        suffix_pool.push(suffixes);
694                    });
695                self.retire_dictionary_budget(evicted_bytes);
696                self.trim_after_budget_retire();
697            }
698            MatcherBackend::Dfast => {
699                let vec_pool = &mut self.vec_pool;
700                let mut evicted_bytes = 0usize;
701                self.dfast_match_generator
702                    .as_mut()
703                    .expect("dfast backend must be initialized by reset() before use")
704                    .add_data(space, |mut data| {
705                        evicted_bytes += data.len();
706                        data.resize(data.capacity(), 0);
707                        vec_pool.push(data);
708                    });
709                self.retire_dictionary_budget(evicted_bytes);
710                self.trim_after_budget_retire();
711            }
712            MatcherBackend::Row => {
713                let vec_pool = &mut self.vec_pool;
714                let mut evicted_bytes = 0usize;
715                self.row_match_generator
716                    .as_mut()
717                    .expect("row backend must be initialized by reset() before use")
718                    .add_data(space, |mut data| {
719                        evicted_bytes += data.len();
720                        data.resize(data.capacity(), 0);
721                        vec_pool.push(data);
722                    });
723                self.retire_dictionary_budget(evicted_bytes);
724                self.trim_after_budget_retire();
725            }
726            MatcherBackend::HashChain => {
727                let vec_pool = &mut self.vec_pool;
728                let mut evicted_bytes = 0usize;
729                self.hc_match_generator
730                    .as_mut()
731                    .expect("hash chain backend must be initialized by reset() before use")
732                    .add_data(space, |mut data| {
733                        evicted_bytes += data.len();
734                        data.resize(data.capacity(), 0);
735                        vec_pool.push(data);
736                    });
737                self.retire_dictionary_budget(evicted_bytes);
738                self.trim_after_budget_retire();
739            }
740        }
741    }
742
743    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
744        match self.active_backend {
745            MatcherBackend::Simple => {
746                while self.match_generator.next_sequence(&mut handle_sequence) {}
747            }
748            MatcherBackend::Dfast => self
749                .dfast_matcher_mut()
750                .start_matching(&mut handle_sequence),
751            MatcherBackend::Row => self.row_matcher_mut().start_matching(&mut handle_sequence),
752            MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
753        }
754    }
755    fn skip_matching(&mut self) {
756        match self.active_backend {
757            MatcherBackend::Simple => self.match_generator.skip_matching(),
758            MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
759            MatcherBackend::Row => self.row_matcher_mut().skip_matching(),
760            MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(),
761        }
762    }
763}
764
765/// This stores the index of a suffix of a string by hashing the first few bytes of that suffix
766/// This means that collisions just overwrite and that you need to check validity after a get
767struct SuffixStore {
768    // We use NonZeroUsize to enable niche optimization here.
769    // On store we do +1 and on get -1
770    // This is ok since usize::MAX is never a valid offset
771    slots: Vec<Option<NonZeroUsize>>,
772    len_log: u32,
773}
774
775impl SuffixStore {
776    fn with_capacity(capacity: usize) -> Self {
777        Self {
778            slots: alloc::vec![None; capacity],
779            len_log: capacity.ilog2(),
780        }
781    }
782
783    #[inline(always)]
784    fn insert(&mut self, suffix: &[u8], idx: usize) {
785        let key = self.key(suffix);
786        self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
787    }
788
789    #[inline(always)]
790    fn contains_key(&self, suffix: &[u8]) -> bool {
791        let key = self.key(suffix);
792        self.slots[key].is_some()
793    }
794
795    #[inline(always)]
796    fn get(&self, suffix: &[u8]) -> Option<usize> {
797        let key = self.key(suffix);
798        self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
799    }
800
801    #[inline(always)]
802    fn key(&self, suffix: &[u8]) -> usize {
803        // Capacity=1 yields len_log=0; shifting by 64 would panic.
804        if self.len_log == 0 {
805            return 0;
806        }
807
808        let s0 = suffix[0] as u64;
809        let s1 = suffix[1] as u64;
810        let s2 = suffix[2] as u64;
811        let s3 = suffix[3] as u64;
812        let s4 = suffix[4] as u64;
813
814        const POLY: u64 = 0xCF3BCCDCABu64;
815
816        let s0 = (s0 << 24).wrapping_mul(POLY);
817        let s1 = (s1 << 32).wrapping_mul(POLY);
818        let s2 = (s2 << 40).wrapping_mul(POLY);
819        let s3 = (s3 << 48).wrapping_mul(POLY);
820        let s4 = (s4 << 56).wrapping_mul(POLY);
821
822        let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
823        let index = index >> (64 - self.len_log);
824        index as usize % self.slots.len()
825    }
826}
827
828/// We keep a window of a few of these entries
829/// All of these are valid targets for a match to be generated for
830struct WindowEntry {
831    data: Vec<u8>,
832    /// Stores indexes into data
833    suffixes: SuffixStore,
834    /// Makes offset calculations efficient
835    base_offset: usize,
836}
837
838pub(crate) struct MatchGenerator {
839    max_window_size: usize,
840    /// Data window we are operating on to find matches
841    /// The data we want to find matches for is in the last slice
842    window: Vec<WindowEntry>,
843    window_size: usize,
844    #[cfg(debug_assertions)]
845    concat_window: Vec<u8>,
846    /// Index in the last slice that we already processed
847    suffix_idx: usize,
848    /// Gets updated when a new sequence is returned to point right behind that sequence
849    last_idx_in_sequence: usize,
850    hash_fill_step: usize,
851    offset_hist: [u32; 3],
852}
853
854impl MatchGenerator {
855    #[inline(always)]
856    #[cfg(target_endian = "little")]
857    fn mismatch_byte_index(diff: usize) -> usize {
858        diff.trailing_zeros() as usize / 8
859    }
860
861    #[inline(always)]
862    #[cfg(target_endian = "big")]
863    fn mismatch_byte_index(diff: usize) -> usize {
864        diff.leading_zeros() as usize / 8
865    }
866
867    /// max_size defines how many bytes will be used at most in the window used for matching
868    fn new(max_size: usize) -> Self {
869        Self {
870            max_window_size: max_size,
871            window: Vec::new(),
872            window_size: 0,
873            #[cfg(debug_assertions)]
874            concat_window: Vec::new(),
875            suffix_idx: 0,
876            last_idx_in_sequence: 0,
877            hash_fill_step: 1,
878            offset_hist: [1, 4, 8],
879        }
880    }
881
882    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
883        self.window_size = 0;
884        #[cfg(debug_assertions)]
885        self.concat_window.clear();
886        self.suffix_idx = 0;
887        self.last_idx_in_sequence = 0;
888        self.offset_hist = [1, 4, 8];
889        self.window.drain(..).for_each(|entry| {
890            reuse_space(entry.data, entry.suffixes);
891        });
892    }
893
894    /// Processes bytes in the current window until either a match is found or no more matches can be found
895    /// * If a match is found handle_sequence is called with the Triple variant
896    /// * If no more matches can be found but there are bytes still left handle_sequence is called with the Literals variant
897    /// * If no more matches can be found and no more bytes are left this returns false
898    fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
899        loop {
900            let last_entry = self.window.last().unwrap();
901            let data_slice = &last_entry.data;
902
903            // We already reached the end of the window, check if we need to return a Literals{}
904            if self.suffix_idx >= data_slice.len() {
905                if self.last_idx_in_sequence != self.suffix_idx {
906                    let literals = &data_slice[self.last_idx_in_sequence..];
907                    self.last_idx_in_sequence = self.suffix_idx;
908                    handle_sequence(Sequence::Literals { literals });
909                    return true;
910                } else {
911                    return false;
912                }
913            }
914
915            // If the remaining data is smaller than the minimum match length we can stop and return a Literals{}
916            let data_slice = &data_slice[self.suffix_idx..];
917            if data_slice.len() < MIN_MATCH_LEN {
918                let last_idx_in_sequence = self.last_idx_in_sequence;
919                self.last_idx_in_sequence = last_entry.data.len();
920                self.suffix_idx = last_entry.data.len();
921                handle_sequence(Sequence::Literals {
922                    literals: &last_entry.data[last_idx_in_sequence..],
923                });
924                return true;
925            }
926
927            // This is the key we are looking to find a match for
928            let key = &data_slice[..MIN_MATCH_LEN];
929            let literals_len = self.suffix_idx - self.last_idx_in_sequence;
930
931            // Look in each window entry
932            let mut candidate = self.repcode_candidate(data_slice, literals_len);
933            for match_entry in self.window.iter() {
934                if let Some(match_index) = match_entry.suffixes.get(key) {
935                    let match_slice = &match_entry.data[match_index..];
936
937                    // Check how long the common prefix actually is
938                    let match_len = Self::common_prefix_len(match_slice, data_slice);
939
940                    // Collisions in the suffix store might make this check fail
941                    if match_len >= MIN_MATCH_LEN {
942                        let offset = match_entry.base_offset + self.suffix_idx - match_index;
943
944                        // If we are in debug/tests make sure the match we found is actually at the offset we calculated
945                        #[cfg(debug_assertions)]
946                        {
947                            let unprocessed = last_entry.data.len() - self.suffix_idx;
948                            let start = self.concat_window.len() - unprocessed - offset;
949                            let end = start + match_len;
950                            let check_slice = &self.concat_window[start..end];
951                            debug_assert_eq!(check_slice, &match_slice[..match_len]);
952                        }
953
954                        if let Some((old_offset, old_match_len)) = candidate {
955                            if match_len > old_match_len
956                                || (match_len == old_match_len && offset < old_offset)
957                            {
958                                candidate = Some((offset, match_len));
959                            }
960                        } else {
961                            candidate = Some((offset, match_len));
962                        }
963                    }
964                }
965            }
966
967            if let Some((offset, match_len)) = candidate {
968                // For each index in the match we found we do not need to look for another match
969                // But we still want them registered in the suffix store
970                self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
971
972                // All literals that were not included between this match and the last are now included here
973                let last_entry = self.window.last().unwrap();
974                let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
975
976                // Update the indexes, all indexes upto and including the current index have been included in a sequence now
977                self.suffix_idx += match_len;
978                self.last_idx_in_sequence = self.suffix_idx;
979                let _ = encode_offset_with_history(
980                    offset as u32,
981                    literals.len() as u32,
982                    &mut self.offset_hist,
983                );
984                handle_sequence(Sequence::Triple {
985                    literals,
986                    offset,
987                    match_len,
988                });
989
990                return true;
991            }
992
993            let last_entry = self.window.last_mut().unwrap();
994            let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
995            if !last_entry.suffixes.contains_key(key) {
996                last_entry.suffixes.insert(key, self.suffix_idx);
997            }
998            self.suffix_idx += 1;
999        }
1000    }
1001
1002    /// Find the common prefix length between two byte slices
1003    #[inline(always)]
1004    fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
1005        let max = a.len().min(b.len());
1006        let chunk = core::mem::size_of::<usize>();
1007        let mut off = 0usize;
1008        let lhs = a.as_ptr();
1009        let rhs = b.as_ptr();
1010
1011        while off + chunk <= max {
1012            let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
1013            let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
1014            let diff = lhs_word ^ rhs_word;
1015            if diff != 0 {
1016                return off + Self::mismatch_byte_index(diff);
1017            }
1018            off += chunk;
1019        }
1020
1021        off + core::iter::zip(&a[off..max], &b[off..max])
1022            .take_while(|(x, y)| x == y)
1023            .count()
1024    }
1025
1026    /// Process bytes and add the suffixes to the suffix store up to a specific index
1027    #[inline(always)]
1028    fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
1029        let start = self.suffix_idx;
1030        let last_entry = self.window.last_mut().unwrap();
1031        if last_entry.data.len() < MIN_MATCH_LEN {
1032            return;
1033        }
1034        let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
1035        if insert_limit > start {
1036            let data = last_entry.data.as_slice();
1037            let suffixes = &mut last_entry.suffixes;
1038            if fill_step == FAST_HASH_FILL_STEP {
1039                Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
1040            } else {
1041                let mut pos = start;
1042                while pos < insert_limit {
1043                    Self::insert_suffix_if_absent(data, suffixes, pos);
1044                    pos += fill_step;
1045                }
1046            }
1047        }
1048
1049        if idx >= start + MIN_MATCH_LEN {
1050            let tail_start = idx - MIN_MATCH_LEN;
1051            let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
1052            if !last_entry.suffixes.contains_key(tail_key) {
1053                last_entry.suffixes.insert(tail_key, tail_start);
1054            }
1055        }
1056    }
1057
1058    #[inline(always)]
1059    fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
1060        debug_assert!(
1061            pos + MIN_MATCH_LEN <= data.len(),
1062            "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
1063            pos,
1064            MIN_MATCH_LEN,
1065            data.len()
1066        );
1067        let key = &data[pos..pos + MIN_MATCH_LEN];
1068        if !suffixes.contains_key(key) {
1069            suffixes.insert(key, pos);
1070        }
1071    }
1072
1073    #[inline(always)]
1074    fn add_suffixes_interleaved_fast(
1075        data: &[u8],
1076        suffixes: &mut SuffixStore,
1077        start: usize,
1078        insert_limit: usize,
1079    ) {
1080        let lane = FAST_HASH_FILL_STEP;
1081        let mut pos = start;
1082
1083        // Pipeline-ish fill: compute and retire several hash positions per loop
1084        // so the fastest path keeps multiple independent hash lookups in flight.
1085        while pos + lane * 3 < insert_limit {
1086            let p0 = pos;
1087            let p1 = pos + lane;
1088            let p2 = pos + lane * 2;
1089            let p3 = pos + lane * 3;
1090
1091            Self::insert_suffix_if_absent(data, suffixes, p0);
1092            Self::insert_suffix_if_absent(data, suffixes, p1);
1093            Self::insert_suffix_if_absent(data, suffixes, p2);
1094            Self::insert_suffix_if_absent(data, suffixes, p3);
1095
1096            pos += lane * 4;
1097        }
1098
1099        while pos < insert_limit {
1100            Self::insert_suffix_if_absent(data, suffixes, pos);
1101            pos += lane;
1102        }
1103    }
1104
1105    fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
1106        if literals_len != 0 {
1107            return None;
1108        }
1109
1110        let reps = [
1111            Some(self.offset_hist[1] as usize),
1112            Some(self.offset_hist[2] as usize),
1113            (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1114        ];
1115
1116        let mut best: Option<(usize, usize)> = None;
1117        let mut seen = [0usize; 3];
1118        let mut seen_len = 0usize;
1119        for offset in reps.into_iter().flatten() {
1120            if offset == 0 {
1121                continue;
1122            }
1123            if seen[..seen_len].contains(&offset) {
1124                continue;
1125            }
1126            seen[seen_len] = offset;
1127            seen_len += 1;
1128
1129            let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1130                continue;
1131            };
1132            if match_len < MIN_MATCH_LEN {
1133                continue;
1134            }
1135
1136            if best.is_none_or(|(old_offset, old_len)| {
1137                match_len > old_len || (match_len == old_len && offset < old_offset)
1138            }) {
1139                best = Some((offset, match_len));
1140            }
1141        }
1142        best
1143    }
1144
1145    fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1146        if offset == 0 {
1147            return None;
1148        }
1149
1150        let last_idx = self.window.len().checked_sub(1)?;
1151        let last_entry = &self.window[last_idx];
1152        let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1153        if offset > searchable_prefix {
1154            return None;
1155        }
1156
1157        let mut remaining = offset;
1158        let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1159            (last_idx, self.suffix_idx - remaining)
1160        } else {
1161            remaining -= self.suffix_idx;
1162            let mut found = None;
1163            for entry_idx in (0..last_idx).rev() {
1164                let len = self.window[entry_idx].data.len();
1165                if remaining <= len {
1166                    found = Some((entry_idx, len - remaining));
1167                    break;
1168                }
1169                remaining -= len;
1170            }
1171            found?
1172        };
1173
1174        let match_entry = &self.window[entry_idx];
1175        let match_slice = &match_entry.data[match_index..];
1176
1177        Some(Self::common_prefix_len(match_slice, data_slice))
1178    }
1179
1180    /// Skip matching for the whole current window entry
1181    fn skip_matching(&mut self) {
1182        let len = self.window.last().unwrap().data.len();
1183        self.add_suffixes_till(len, 1);
1184        self.suffix_idx = len;
1185        self.last_idx_in_sequence = len;
1186    }
1187
1188    /// Add a new window entry. Will panic if the last window entry hasn't been processed properly.
1189    /// If any resources are released by pushing the new entry they are returned via the callback
1190    fn add_data(
1191        &mut self,
1192        data: Vec<u8>,
1193        suffixes: SuffixStore,
1194        reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1195    ) {
1196        assert!(
1197            self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1198        );
1199        self.reserve(data.len(), reuse_space);
1200        #[cfg(debug_assertions)]
1201        self.concat_window.extend_from_slice(&data);
1202
1203        if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1204            for entry in self.window.iter_mut() {
1205                entry.base_offset += last_len;
1206            }
1207        }
1208
1209        let len = data.len();
1210        self.window.push(WindowEntry {
1211            data,
1212            suffixes,
1213            base_offset: 0,
1214        });
1215        self.window_size += len;
1216        self.suffix_idx = 0;
1217        self.last_idx_in_sequence = 0;
1218    }
1219
1220    /// Reserve space for a new window entry
1221    /// If any resources are released by pushing the new entry they are returned via the callback
1222    fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1223        assert!(self.max_window_size >= amount);
1224        while self.window_size + amount > self.max_window_size {
1225            let removed = self.window.remove(0);
1226            self.window_size -= removed.data.len();
1227            #[cfg(debug_assertions)]
1228            self.concat_window.drain(0..removed.data.len());
1229
1230            let WindowEntry {
1231                suffixes,
1232                data: leaked_vec,
1233                base_offset: _,
1234            } = removed;
1235            reuse_space(leaked_vec, suffixes);
1236        }
1237    }
1238}
1239
1240struct DfastMatchGenerator {
1241    max_window_size: usize,
1242    window: VecDeque<Vec<u8>>,
1243    window_size: usize,
1244    // We keep a contiguous searchable history to avoid rebuilding and reseeding
1245    // the matcher state from disjoint block buffers on every block.
1246    history: Vec<u8>,
1247    history_start: usize,
1248    history_abs_start: usize,
1249    offset_hist: [u32; 3],
1250    short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1251    long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1252    hash_bits: usize,
1253    // Lazy match lookahead depth (internal tuning parameter).
1254    lazy_depth: u8,
1255}
1256
1257#[derive(Copy, Clone)]
1258struct MatchCandidate {
1259    start: usize,
1260    offset: usize,
1261    match_len: usize,
1262}
1263
1264fn best_len_offset_candidate(
1265    lhs: Option<MatchCandidate>,
1266    rhs: Option<MatchCandidate>,
1267) -> Option<MatchCandidate> {
1268    match (lhs, rhs) {
1269        (None, other) | (other, None) => other,
1270        (Some(lhs), Some(rhs)) => {
1271            if rhs.match_len > lhs.match_len
1272                || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1273            {
1274                Some(rhs)
1275            } else {
1276                Some(lhs)
1277            }
1278        }
1279    }
1280}
1281
1282fn extend_backwards_shared(
1283    concat: &[u8],
1284    history_abs_start: usize,
1285    mut candidate_pos: usize,
1286    mut abs_pos: usize,
1287    mut match_len: usize,
1288    lit_len: usize,
1289) -> MatchCandidate {
1290    let min_abs_pos = abs_pos - lit_len;
1291    while abs_pos > min_abs_pos
1292        && candidate_pos > history_abs_start
1293        && concat[candidate_pos - history_abs_start - 1] == concat[abs_pos - history_abs_start - 1]
1294    {
1295        candidate_pos -= 1;
1296        abs_pos -= 1;
1297        match_len += 1;
1298    }
1299    MatchCandidate {
1300        start: abs_pos,
1301        offset: abs_pos - candidate_pos,
1302        match_len,
1303    }
1304}
1305
1306fn repcode_candidate_shared(
1307    concat: &[u8],
1308    history_abs_start: usize,
1309    offset_hist: [u32; 3],
1310    abs_pos: usize,
1311    lit_len: usize,
1312    min_match_len: usize,
1313) -> Option<MatchCandidate> {
1314    let reps = if lit_len == 0 {
1315        [
1316            Some(offset_hist[1] as usize),
1317            Some(offset_hist[2] as usize),
1318            (offset_hist[0] > 1).then_some((offset_hist[0] - 1) as usize),
1319        ]
1320    } else {
1321        [
1322            Some(offset_hist[0] as usize),
1323            Some(offset_hist[1] as usize),
1324            Some(offset_hist[2] as usize),
1325        ]
1326    };
1327
1328    let current_idx = abs_pos - history_abs_start;
1329    if current_idx + min_match_len > concat.len() {
1330        return None;
1331    }
1332
1333    let mut best = None;
1334    for rep in reps.into_iter().flatten() {
1335        if rep == 0 || rep > abs_pos {
1336            continue;
1337        }
1338        let candidate_pos = abs_pos - rep;
1339        if candidate_pos < history_abs_start {
1340            continue;
1341        }
1342        let candidate_idx = candidate_pos - history_abs_start;
1343        let match_len =
1344            MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1345        if match_len >= min_match_len {
1346            let candidate = extend_backwards_shared(
1347                concat,
1348                history_abs_start,
1349                candidate_pos,
1350                abs_pos,
1351                match_len,
1352                lit_len,
1353            );
1354            best = best_len_offset_candidate(best, Some(candidate));
1355        }
1356    }
1357    best
1358}
1359
1360#[derive(Copy, Clone)]
1361struct LazyMatchConfig {
1362    target_len: usize,
1363    min_match_len: usize,
1364    lazy_depth: u8,
1365    history_abs_end: usize,
1366}
1367
1368fn pick_lazy_match_shared(
1369    abs_pos: usize,
1370    lit_len: usize,
1371    best: Option<MatchCandidate>,
1372    config: LazyMatchConfig,
1373    mut best_match_at: impl FnMut(usize, usize) -> Option<MatchCandidate>,
1374) -> Option<MatchCandidate> {
1375    let best = best?;
1376    if best.match_len >= config.target_len
1377        || abs_pos + 1 + config.min_match_len > config.history_abs_end
1378    {
1379        return Some(best);
1380    }
1381
1382    let next = best_match_at(abs_pos + 1, lit_len + 1);
1383    if let Some(next) = next
1384        && (next.match_len > best.match_len
1385            || (next.match_len == best.match_len && next.offset < best.offset))
1386    {
1387        return None;
1388    }
1389
1390    if config.lazy_depth >= 2 && abs_pos + 2 + config.min_match_len <= config.history_abs_end {
1391        let next2 = best_match_at(abs_pos + 2, lit_len + 2);
1392        if let Some(next2) = next2
1393            && next2.match_len > best.match_len + 1
1394        {
1395            return None;
1396        }
1397    }
1398
1399    Some(best)
1400}
1401
1402impl DfastMatchGenerator {
1403    fn new(max_window_size: usize) -> Self {
1404        Self {
1405            max_window_size,
1406            window: VecDeque::new(),
1407            window_size: 0,
1408            history: Vec::new(),
1409            history_start: 0,
1410            history_abs_start: 0,
1411            offset_hist: [1, 4, 8],
1412            short_hash: Vec::new(),
1413            long_hash: Vec::new(),
1414            hash_bits: DFAST_HASH_BITS,
1415            lazy_depth: 1,
1416        }
1417    }
1418
1419    fn set_hash_bits(&mut self, bits: usize) {
1420        let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1421        if self.hash_bits != clamped {
1422            self.hash_bits = clamped;
1423            self.short_hash = Vec::new();
1424            self.long_hash = Vec::new();
1425        }
1426    }
1427
1428    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1429        self.window_size = 0;
1430        self.history.clear();
1431        self.history_start = 0;
1432        self.history_abs_start = 0;
1433        self.offset_hist = [1, 4, 8];
1434        if !self.short_hash.is_empty() {
1435            self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1436            self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1437        }
1438        for mut data in self.window.drain(..) {
1439            data.resize(data.capacity(), 0);
1440            reuse_space(data);
1441        }
1442    }
1443
1444    fn get_last_space(&self) -> &[u8] {
1445        self.window.back().unwrap().as_slice()
1446    }
1447
1448    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1449        assert!(data.len() <= self.max_window_size);
1450        while self.window_size + data.len() > self.max_window_size {
1451            let removed = self.window.pop_front().unwrap();
1452            self.window_size -= removed.len();
1453            self.history_start += removed.len();
1454            self.history_abs_start += removed.len();
1455            reuse_space(removed);
1456        }
1457        self.compact_history();
1458        self.history.extend_from_slice(&data);
1459        self.window_size += data.len();
1460        self.window.push_back(data);
1461    }
1462
1463    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1464        while self.window_size > self.max_window_size {
1465            let removed = self.window.pop_front().unwrap();
1466            self.window_size -= removed.len();
1467            self.history_start += removed.len();
1468            self.history_abs_start += removed.len();
1469            reuse_space(removed);
1470        }
1471    }
1472
1473    fn skip_matching(&mut self) {
1474        self.ensure_hash_tables();
1475        let current_len = self.window.back().unwrap().len();
1476        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1477        self.insert_positions(current_abs_start, current_abs_start + current_len);
1478    }
1479
1480    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1481        self.ensure_hash_tables();
1482
1483        let current_len = self.window.back().unwrap().len();
1484        if current_len == 0 {
1485            return;
1486        }
1487
1488        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1489
1490        let mut pos = 0usize;
1491        let mut literals_start = 0usize;
1492        while pos + DFAST_MIN_MATCH_LEN <= current_len {
1493            let abs_pos = current_abs_start + pos;
1494            let lit_len = pos - literals_start;
1495
1496            let best = self.best_match(abs_pos, lit_len);
1497            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1498                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1499                let current = self.window.back().unwrap().as_slice();
1500                let start = candidate.start - current_abs_start;
1501                let literals = &current[literals_start..start];
1502                handle_sequence(Sequence::Triple {
1503                    literals,
1504                    offset: candidate.offset,
1505                    match_len: candidate.match_len,
1506                });
1507                // The encoded offset value is irrelevant here; we only need the
1508                // side effect on offset history for future rep-code matching.
1509                let _ = encode_offset_with_history(
1510                    candidate.offset as u32,
1511                    literals.len() as u32,
1512                    &mut self.offset_hist,
1513                );
1514                pos = start + candidate.match_len;
1515                literals_start = pos;
1516            } else {
1517                self.insert_position(abs_pos);
1518                pos += 1;
1519            }
1520        }
1521
1522        if literals_start < current_len {
1523            // We stop inserting once fewer than DFAST_MIN_MATCH_LEN bytes remain.
1524            // The last boundary-spanning start that can seed a dfast hash is
1525            // still inserted by the loop above; `dfast_inserts_tail_positions_
1526            // for_next_block_matching()` asserts that the next block can match
1527            // immediately at the boundary without eagerly seeding the final
1528            // DFAST_MIN_MATCH_LEN - 1 bytes here.
1529            let current = self.window.back().unwrap().as_slice();
1530            handle_sequence(Sequence::Literals {
1531                literals: &current[literals_start..],
1532            });
1533        }
1534    }
1535
1536    fn ensure_hash_tables(&mut self) {
1537        let table_len = 1usize << self.hash_bits;
1538        if self.short_hash.len() != table_len {
1539            // This is intentionally lazy so Fastest/Uncompressed never pay the
1540            // ~dfast-level memory cost. The current size tracks the issue's
1541            // zstd level-3 style parameters rather than a generic low-memory preset.
1542            self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1543            self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1544        }
1545    }
1546
1547    fn compact_history(&mut self) {
1548        if self.history_start == 0 {
1549            return;
1550        }
1551        if self.history_start >= self.max_window_size
1552            || self.history_start * 2 >= self.history.len()
1553        {
1554            self.history.drain(..self.history_start);
1555            self.history_start = 0;
1556        }
1557    }
1558
1559    fn live_history(&self) -> &[u8] {
1560        &self.history[self.history_start..]
1561    }
1562
1563    fn history_abs_end(&self) -> usize {
1564        self.history_abs_start + self.live_history().len()
1565    }
1566
1567    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1568        let rep = self.repcode_candidate(abs_pos, lit_len);
1569        let hash = self.hash_candidate(abs_pos, lit_len);
1570        best_len_offset_candidate(rep, hash)
1571    }
1572
1573    fn pick_lazy_match(
1574        &self,
1575        abs_pos: usize,
1576        lit_len: usize,
1577        best: Option<MatchCandidate>,
1578    ) -> Option<MatchCandidate> {
1579        pick_lazy_match_shared(
1580            abs_pos,
1581            lit_len,
1582            best,
1583            LazyMatchConfig {
1584                target_len: DFAST_TARGET_LEN,
1585                min_match_len: DFAST_MIN_MATCH_LEN,
1586                lazy_depth: self.lazy_depth,
1587                history_abs_end: self.history_abs_end(),
1588            },
1589            |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
1590        )
1591    }
1592
1593    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1594        repcode_candidate_shared(
1595            self.live_history(),
1596            self.history_abs_start,
1597            self.offset_hist,
1598            abs_pos,
1599            lit_len,
1600            DFAST_MIN_MATCH_LEN,
1601        )
1602    }
1603
1604    fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1605        let concat = self.live_history();
1606        let current_idx = abs_pos - self.history_abs_start;
1607        let mut best = None;
1608        for candidate_pos in self.long_candidates(abs_pos) {
1609            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1610                continue;
1611            }
1612            let candidate_idx = candidate_pos - self.history_abs_start;
1613            let match_len =
1614                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1615            if match_len >= DFAST_MIN_MATCH_LEN {
1616                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1617                best = best_len_offset_candidate(best, Some(candidate));
1618                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1619                    return best;
1620                }
1621            }
1622        }
1623
1624        for candidate_pos in self.short_candidates(abs_pos) {
1625            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1626                continue;
1627            }
1628            let candidate_idx = candidate_pos - self.history_abs_start;
1629            let match_len =
1630                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1631            if match_len >= DFAST_MIN_MATCH_LEN {
1632                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1633                best = best_len_offset_candidate(best, Some(candidate));
1634                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1635                    return best;
1636                }
1637            }
1638        }
1639        best
1640    }
1641
1642    fn extend_backwards(
1643        &self,
1644        candidate_pos: usize,
1645        abs_pos: usize,
1646        match_len: usize,
1647        lit_len: usize,
1648    ) -> MatchCandidate {
1649        extend_backwards_shared(
1650            self.live_history(),
1651            self.history_abs_start,
1652            candidate_pos,
1653            abs_pos,
1654            match_len,
1655            lit_len,
1656        )
1657    }
1658
1659    fn insert_positions(&mut self, start: usize, end: usize) {
1660        for pos in start..end {
1661            self.insert_position(pos);
1662        }
1663    }
1664
1665    fn insert_position(&mut self, pos: usize) {
1666        let idx = pos - self.history_abs_start;
1667        let short = {
1668            let concat = self.live_history();
1669            (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
1670        };
1671        if let Some(short) = short {
1672            let bucket = &mut self.short_hash[short];
1673            if bucket[0] != pos {
1674                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1675                bucket[0] = pos;
1676            }
1677        }
1678
1679        let long = {
1680            let concat = self.live_history();
1681            (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
1682        };
1683        if let Some(long) = long {
1684            let bucket = &mut self.long_hash[long];
1685            if bucket[0] != pos {
1686                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1687                bucket[0] = pos;
1688            }
1689        }
1690    }
1691
1692    fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1693        let concat = self.live_history();
1694        let idx = pos - self.history_abs_start;
1695        (idx + 4 <= concat.len())
1696            .then(|| self.short_hash[self.hash4(&concat[idx..])])
1697            .into_iter()
1698            .flatten()
1699            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1700    }
1701
1702    fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1703        let concat = self.live_history();
1704        let idx = pos - self.history_abs_start;
1705        (idx + 8 <= concat.len())
1706            .then(|| self.long_hash[self.hash8(&concat[idx..])])
1707            .into_iter()
1708            .flatten()
1709            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1710    }
1711
1712    fn hash4(&self, data: &[u8]) -> usize {
1713        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1714        self.hash_index(value)
1715    }
1716
1717    fn hash8(&self, data: &[u8]) -> usize {
1718        let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1719        self.hash_index(value)
1720    }
1721
1722    fn hash_index(&self, value: u64) -> usize {
1723        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1724        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize
1725    }
1726}
1727
1728struct RowMatchGenerator {
1729    max_window_size: usize,
1730    window: VecDeque<Vec<u8>>,
1731    window_size: usize,
1732    history: Vec<u8>,
1733    history_start: usize,
1734    history_abs_start: usize,
1735    offset_hist: [u32; 3],
1736    row_hash_log: usize,
1737    row_log: usize,
1738    search_depth: usize,
1739    target_len: usize,
1740    lazy_depth: u8,
1741    row_heads: Vec<u8>,
1742    row_positions: Vec<usize>,
1743    row_tags: Vec<u8>,
1744}
1745
1746impl RowMatchGenerator {
1747    fn new(max_window_size: usize) -> Self {
1748        Self {
1749            max_window_size,
1750            window: VecDeque::new(),
1751            window_size: 0,
1752            history: Vec::new(),
1753            history_start: 0,
1754            history_abs_start: 0,
1755            offset_hist: [1, 4, 8],
1756            row_hash_log: ROW_HASH_BITS - ROW_LOG,
1757            row_log: ROW_LOG,
1758            search_depth: ROW_SEARCH_DEPTH,
1759            target_len: ROW_TARGET_LEN,
1760            lazy_depth: 1,
1761            row_heads: Vec::new(),
1762            row_positions: Vec::new(),
1763            row_tags: Vec::new(),
1764        }
1765    }
1766
1767    fn set_hash_bits(&mut self, bits: usize) {
1768        let clamped = bits.clamp(self.row_log + 1, ROW_HASH_BITS);
1769        let row_hash_log = clamped.saturating_sub(self.row_log);
1770        if self.row_hash_log != row_hash_log {
1771            self.row_hash_log = row_hash_log;
1772            self.row_heads.clear();
1773            self.row_positions.clear();
1774            self.row_tags.clear();
1775        }
1776    }
1777
1778    fn configure(&mut self, config: RowConfig) {
1779        self.row_log = config.row_log.clamp(4, 6);
1780        self.search_depth = config.search_depth;
1781        self.target_len = config.target_len;
1782        self.set_hash_bits(config.hash_bits.max(self.row_log + 1));
1783    }
1784
1785    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1786        self.window_size = 0;
1787        self.history.clear();
1788        self.history_start = 0;
1789        self.history_abs_start = 0;
1790        self.offset_hist = [1, 4, 8];
1791        self.row_heads.fill(0);
1792        self.row_positions.fill(ROW_EMPTY_SLOT);
1793        self.row_tags.fill(0);
1794        for mut data in self.window.drain(..) {
1795            data.resize(data.capacity(), 0);
1796            reuse_space(data);
1797        }
1798    }
1799
1800    fn get_last_space(&self) -> &[u8] {
1801        self.window.back().unwrap().as_slice()
1802    }
1803
1804    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1805        assert!(data.len() <= self.max_window_size);
1806        while self.window_size + data.len() > self.max_window_size {
1807            let removed = self.window.pop_front().unwrap();
1808            self.window_size -= removed.len();
1809            self.history_start += removed.len();
1810            self.history_abs_start += removed.len();
1811            reuse_space(removed);
1812        }
1813        self.compact_history();
1814        self.history.extend_from_slice(&data);
1815        self.window_size += data.len();
1816        self.window.push_back(data);
1817    }
1818
1819    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1820        while self.window_size > self.max_window_size {
1821            let removed = self.window.pop_front().unwrap();
1822            self.window_size -= removed.len();
1823            self.history_start += removed.len();
1824            self.history_abs_start += removed.len();
1825            reuse_space(removed);
1826        }
1827    }
1828
1829    fn skip_matching(&mut self) {
1830        self.ensure_tables();
1831        let current_len = self.window.back().unwrap().len();
1832        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1833        let backfill_start = self.backfill_start(current_abs_start);
1834        if backfill_start < current_abs_start {
1835            self.insert_positions(backfill_start, current_abs_start);
1836        }
1837        self.insert_positions(current_abs_start, current_abs_start + current_len);
1838    }
1839
1840    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1841        self.ensure_tables();
1842
1843        let current_len = self.window.back().unwrap().len();
1844        if current_len == 0 {
1845            return;
1846        }
1847        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1848        let backfill_start = self.backfill_start(current_abs_start);
1849        if backfill_start < current_abs_start {
1850            self.insert_positions(backfill_start, current_abs_start);
1851        }
1852
1853        let mut pos = 0usize;
1854        let mut literals_start = 0usize;
1855        while pos + ROW_MIN_MATCH_LEN <= current_len {
1856            let abs_pos = current_abs_start + pos;
1857            let lit_len = pos - literals_start;
1858
1859            let best = self.best_match(abs_pos, lit_len);
1860            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1861                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1862                let current = self.window.back().unwrap().as_slice();
1863                let start = candidate.start - current_abs_start;
1864                let literals = &current[literals_start..start];
1865                handle_sequence(Sequence::Triple {
1866                    literals,
1867                    offset: candidate.offset,
1868                    match_len: candidate.match_len,
1869                });
1870                let _ = encode_offset_with_history(
1871                    candidate.offset as u32,
1872                    literals.len() as u32,
1873                    &mut self.offset_hist,
1874                );
1875                pos = start + candidate.match_len;
1876                literals_start = pos;
1877            } else {
1878                self.insert_position(abs_pos);
1879                pos += 1;
1880            }
1881        }
1882
1883        while pos + ROW_HASH_KEY_LEN <= current_len {
1884            self.insert_position(current_abs_start + pos);
1885            pos += 1;
1886        }
1887
1888        if literals_start < current_len {
1889            let current = self.window.back().unwrap().as_slice();
1890            handle_sequence(Sequence::Literals {
1891                literals: &current[literals_start..],
1892            });
1893        }
1894    }
1895
1896    fn ensure_tables(&mut self) {
1897        let row_count = 1usize << self.row_hash_log;
1898        let row_entries = 1usize << self.row_log;
1899        let total = row_count * row_entries;
1900        if self.row_positions.len() != total {
1901            self.row_heads = alloc::vec![0; row_count];
1902            self.row_positions = alloc::vec![ROW_EMPTY_SLOT; total];
1903            self.row_tags = alloc::vec![0; total];
1904        }
1905    }
1906
1907    fn compact_history(&mut self) {
1908        if self.history_start == 0 {
1909            return;
1910        }
1911        if self.history_start >= self.max_window_size
1912            || self.history_start * 2 >= self.history.len()
1913        {
1914            self.history.drain(..self.history_start);
1915            self.history_start = 0;
1916        }
1917    }
1918
1919    fn live_history(&self) -> &[u8] {
1920        &self.history[self.history_start..]
1921    }
1922
1923    fn history_abs_end(&self) -> usize {
1924        self.history_abs_start + self.live_history().len()
1925    }
1926
1927    fn hash_and_row(&self, abs_pos: usize) -> Option<(usize, u8)> {
1928        let idx = abs_pos - self.history_abs_start;
1929        let concat = self.live_history();
1930        if idx + ROW_HASH_KEY_LEN > concat.len() {
1931            return None;
1932        }
1933        let value =
1934            u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
1935        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1936        let hash = value.wrapping_mul(PRIME);
1937        let total_bits = self.row_hash_log + ROW_TAG_BITS;
1938        let combined = hash >> (u64::BITS as usize - total_bits);
1939        let row_mask = (1usize << self.row_hash_log) - 1;
1940        let row = ((combined >> ROW_TAG_BITS) as usize) & row_mask;
1941        let tag = combined as u8;
1942        Some((row, tag))
1943    }
1944
1945    fn backfill_start(&self, current_abs_start: usize) -> usize {
1946        current_abs_start
1947            .saturating_sub(ROW_HASH_KEY_LEN - 1)
1948            .max(self.history_abs_start)
1949    }
1950
1951    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1952        let rep = self.repcode_candidate(abs_pos, lit_len);
1953        let row = self.row_candidate(abs_pos, lit_len);
1954        best_len_offset_candidate(rep, row)
1955    }
1956
1957    fn pick_lazy_match(
1958        &self,
1959        abs_pos: usize,
1960        lit_len: usize,
1961        best: Option<MatchCandidate>,
1962    ) -> Option<MatchCandidate> {
1963        pick_lazy_match_shared(
1964            abs_pos,
1965            lit_len,
1966            best,
1967            LazyMatchConfig {
1968                target_len: self.target_len,
1969                min_match_len: ROW_MIN_MATCH_LEN,
1970                lazy_depth: self.lazy_depth,
1971                history_abs_end: self.history_abs_end(),
1972            },
1973            |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
1974        )
1975    }
1976
1977    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1978        repcode_candidate_shared(
1979            self.live_history(),
1980            self.history_abs_start,
1981            self.offset_hist,
1982            abs_pos,
1983            lit_len,
1984            ROW_MIN_MATCH_LEN,
1985        )
1986    }
1987
1988    fn row_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1989        let concat = self.live_history();
1990        let current_idx = abs_pos - self.history_abs_start;
1991        if current_idx + ROW_MIN_MATCH_LEN > concat.len() {
1992            return None;
1993        }
1994
1995        let (row, tag) = self.hash_and_row(abs_pos)?;
1996        let row_entries = 1usize << self.row_log;
1997        let row_mask = row_entries - 1;
1998        let row_base = row << self.row_log;
1999        let head = self.row_heads[row] as usize;
2000        let max_walk = self.search_depth.min(row_entries);
2001
2002        let mut best = None;
2003        for i in 0..max_walk {
2004            let slot = (head + i) & row_mask;
2005            let idx = row_base + slot;
2006            if self.row_tags[idx] != tag {
2007                continue;
2008            }
2009            let candidate_pos = self.row_positions[idx];
2010            if candidate_pos == ROW_EMPTY_SLOT
2011                || candidate_pos < self.history_abs_start
2012                || candidate_pos >= abs_pos
2013            {
2014                continue;
2015            }
2016            let candidate_idx = candidate_pos - self.history_abs_start;
2017            let match_len =
2018                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2019            if match_len >= ROW_MIN_MATCH_LEN {
2020                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2021                best = best_len_offset_candidate(best, Some(candidate));
2022                if best.is_some_and(|best| best.match_len >= self.target_len) {
2023                    return best;
2024                }
2025            }
2026        }
2027        best
2028    }
2029
2030    fn extend_backwards(
2031        &self,
2032        candidate_pos: usize,
2033        abs_pos: usize,
2034        match_len: usize,
2035        lit_len: usize,
2036    ) -> MatchCandidate {
2037        extend_backwards_shared(
2038            self.live_history(),
2039            self.history_abs_start,
2040            candidate_pos,
2041            abs_pos,
2042            match_len,
2043            lit_len,
2044        )
2045    }
2046
2047    fn insert_positions(&mut self, start: usize, end: usize) {
2048        for pos in start..end {
2049            self.insert_position(pos);
2050        }
2051    }
2052
2053    fn insert_position(&mut self, abs_pos: usize) {
2054        let Some((row, tag)) = self.hash_and_row(abs_pos) else {
2055            return;
2056        };
2057        let row_entries = 1usize << self.row_log;
2058        let row_mask = row_entries - 1;
2059        let row_base = row << self.row_log;
2060        let head = self.row_heads[row] as usize;
2061        let next = head.wrapping_sub(1) & row_mask;
2062        self.row_heads[row] = next as u8;
2063        self.row_tags[row_base + next] = tag;
2064        self.row_positions[row_base + next] = abs_pos;
2065    }
2066}
2067
2068struct HcMatchGenerator {
2069    max_window_size: usize,
2070    window: VecDeque<Vec<u8>>,
2071    window_size: usize,
2072    history: Vec<u8>,
2073    history_start: usize,
2074    history_abs_start: usize,
2075    position_base: usize,
2076    offset_hist: [u32; 3],
2077    hash_table: Vec<u32>,
2078    chain_table: Vec<u32>,
2079    lazy_depth: u8,
2080    hash_log: usize,
2081    chain_log: usize,
2082    search_depth: usize,
2083    target_len: usize,
2084}
2085
2086impl HcMatchGenerator {
2087    fn new(max_window_size: usize) -> Self {
2088        Self {
2089            max_window_size,
2090            window: VecDeque::new(),
2091            window_size: 0,
2092            history: Vec::new(),
2093            history_start: 0,
2094            history_abs_start: 0,
2095            position_base: 0,
2096            offset_hist: [1, 4, 8],
2097            hash_table: Vec::new(),
2098            chain_table: Vec::new(),
2099            lazy_depth: 2,
2100            hash_log: HC_HASH_LOG,
2101            chain_log: HC_CHAIN_LOG,
2102            search_depth: HC_SEARCH_DEPTH,
2103            target_len: HC_TARGET_LEN,
2104        }
2105    }
2106
2107    fn configure(&mut self, config: HcConfig) {
2108        let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
2109        self.hash_log = config.hash_log;
2110        self.chain_log = config.chain_log;
2111        self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
2112        self.target_len = config.target_len;
2113        if resize && !self.hash_table.is_empty() {
2114            // Force reallocation on next ensure_tables() call.
2115            self.hash_table.clear();
2116            self.chain_table.clear();
2117        }
2118    }
2119
2120    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2121        self.window_size = 0;
2122        self.history.clear();
2123        self.history_start = 0;
2124        self.history_abs_start = 0;
2125        self.position_base = 0;
2126        self.offset_hist = [1, 4, 8];
2127        if !self.hash_table.is_empty() {
2128            self.hash_table.fill(HC_EMPTY);
2129            self.chain_table.fill(HC_EMPTY);
2130        }
2131        for mut data in self.window.drain(..) {
2132            data.resize(data.capacity(), 0);
2133            reuse_space(data);
2134        }
2135    }
2136
2137    fn get_last_space(&self) -> &[u8] {
2138        self.window.back().unwrap().as_slice()
2139    }
2140
2141    // History duplicates window data for O(1) contiguous access during match
2142    // finding (common_prefix_len, extend_backwards). Same pattern as
2143    // DfastMatchGenerator. Peak: ~2x window size for data buffers + 6 MB tables.
2144    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2145        assert!(data.len() <= self.max_window_size);
2146        while self.window_size + data.len() > self.max_window_size {
2147            let removed = self.window.pop_front().unwrap();
2148            self.window_size -= removed.len();
2149            self.history_start += removed.len();
2150            self.history_abs_start += removed.len();
2151            reuse_space(removed);
2152        }
2153        self.compact_history();
2154        self.history.extend_from_slice(&data);
2155        self.window_size += data.len();
2156        self.window.push_back(data);
2157    }
2158
2159    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2160        while self.window_size > self.max_window_size {
2161            let removed = self.window.pop_front().unwrap();
2162            self.window_size -= removed.len();
2163            self.history_start += removed.len();
2164            self.history_abs_start += removed.len();
2165            reuse_space(removed);
2166        }
2167    }
2168
2169    /// Backfill positions from the tail of the previous slice that couldn't be
2170    /// hashed at the time (insert_position needs 4 bytes of lookahead).
2171    fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
2172        let backfill_start = current_abs_start
2173            .saturating_sub(3)
2174            .max(self.history_abs_start);
2175        if backfill_start < current_abs_start {
2176            self.insert_positions(backfill_start, current_abs_start);
2177        }
2178    }
2179
2180    fn skip_matching(&mut self) {
2181        self.ensure_tables();
2182        let current_len = self.window.back().unwrap().len();
2183        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2184        self.backfill_boundary_positions(current_abs_start);
2185        self.insert_positions(current_abs_start, current_abs_start + current_len);
2186    }
2187
2188    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2189        self.ensure_tables();
2190
2191        let current_len = self.window.back().unwrap().len();
2192        if current_len == 0 {
2193            return;
2194        }
2195
2196        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2197        self.backfill_boundary_positions(current_abs_start);
2198
2199        let mut pos = 0usize;
2200        let mut literals_start = 0usize;
2201        while pos + HC_MIN_MATCH_LEN <= current_len {
2202            let abs_pos = current_abs_start + pos;
2203            let lit_len = pos - literals_start;
2204
2205            let best = self.find_best_match(abs_pos, lit_len);
2206            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2207                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2208                let current = self.window.back().unwrap().as_slice();
2209                let start = candidate.start - current_abs_start;
2210                let literals = &current[literals_start..start];
2211                handle_sequence(Sequence::Triple {
2212                    literals,
2213                    offset: candidate.offset,
2214                    match_len: candidate.match_len,
2215                });
2216                let _ = encode_offset_with_history(
2217                    candidate.offset as u32,
2218                    literals.len() as u32,
2219                    &mut self.offset_hist,
2220                );
2221                pos = start + candidate.match_len;
2222                literals_start = pos;
2223            } else {
2224                self.insert_position(abs_pos);
2225                pos += 1;
2226            }
2227        }
2228
2229        // Insert remaining hashable positions in the tail (the matching loop
2230        // stops at HC_MIN_MATCH_LEN but insert_position only needs 4 bytes).
2231        while pos + 4 <= current_len {
2232            self.insert_position(current_abs_start + pos);
2233            pos += 1;
2234        }
2235
2236        if literals_start < current_len {
2237            let current = self.window.back().unwrap().as_slice();
2238            handle_sequence(Sequence::Literals {
2239                literals: &current[literals_start..],
2240            });
2241        }
2242    }
2243
2244    fn ensure_tables(&mut self) {
2245        if self.hash_table.is_empty() {
2246            self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
2247            self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
2248        }
2249    }
2250
2251    fn compact_history(&mut self) {
2252        if self.history_start == 0 {
2253            return;
2254        }
2255        if self.history_start >= self.max_window_size
2256            || self.history_start * 2 >= self.history.len()
2257        {
2258            self.history.drain(..self.history_start);
2259            self.history_start = 0;
2260        }
2261    }
2262
2263    fn live_history(&self) -> &[u8] {
2264        &self.history[self.history_start..]
2265    }
2266
2267    fn history_abs_end(&self) -> usize {
2268        self.history_abs_start + self.live_history().len()
2269    }
2270
2271    fn hash_position(&self, data: &[u8]) -> usize {
2272        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2273        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2274        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
2275    }
2276
2277    fn relative_position(&self, abs_pos: usize) -> Option<u32> {
2278        let rel = abs_pos.checked_sub(self.position_base)?;
2279        let rel_u32 = u32::try_from(rel).ok()?;
2280        // Positions are stored as (relative_pos + 1), with 0 reserved as the
2281        // empty sentinel. So the raw relative position itself must stay
2282        // strictly below u32::MAX.
2283        (rel_u32 < u32::MAX).then_some(rel_u32)
2284    }
2285
2286    fn maybe_rebase_positions(&mut self, abs_pos: usize) {
2287        let needs_rebase = self
2288            .relative_position(abs_pos)
2289            .is_none_or(|relative| relative >= u32::MAX - 1);
2290        if !needs_rebase {
2291            return;
2292        }
2293
2294        // Keep all live history addressable after rebase.
2295        self.position_base = self.history_abs_start;
2296        self.hash_table.fill(HC_EMPTY);
2297        self.chain_table.fill(HC_EMPTY);
2298
2299        let history_start = self.history_abs_start;
2300        // Rebuild only the already-inserted prefix. The caller inserts abs_pos
2301        // immediately after this, and later positions are added in-order.
2302        for pos in history_start..abs_pos {
2303            self.insert_position_no_rebase(pos);
2304        }
2305    }
2306
2307    fn insert_position(&mut self, abs_pos: usize) {
2308        self.maybe_rebase_positions(abs_pos);
2309        self.insert_position_no_rebase(abs_pos);
2310    }
2311
2312    fn insert_position_no_rebase(&mut self, abs_pos: usize) {
2313        let idx = abs_pos - self.history_abs_start;
2314        let concat = self.live_history();
2315        if idx + 4 > concat.len() {
2316            return;
2317        }
2318        let hash = self.hash_position(&concat[idx..]);
2319        let Some(relative_pos) = self.relative_position(abs_pos) else {
2320            return;
2321        };
2322        let stored = relative_pos + 1;
2323        let chain_idx = relative_pos as usize & ((1 << self.chain_log) - 1);
2324        let prev = self.hash_table[hash];
2325        self.chain_table[chain_idx] = prev;
2326        self.hash_table[hash] = stored;
2327    }
2328
2329    fn insert_positions(&mut self, start: usize, end: usize) {
2330        for pos in start..end {
2331            self.insert_position(pos);
2332        }
2333    }
2334
2335    // Fixed-size stack array is intentional: it avoids heap allocation on
2336    // the hot path and the sentinel loop exits at self.search_depth.
2337    fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
2338        let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
2339        let idx = abs_pos - self.history_abs_start;
2340        let concat = self.live_history();
2341        if idx + 4 > concat.len() {
2342            return buf;
2343        }
2344        let hash = self.hash_position(&concat[idx..]);
2345        let chain_mask = (1 << self.chain_log) - 1;
2346
2347        let mut cur = self.hash_table[hash];
2348        let mut filled = 0;
2349        // Follow chain up to search_depth valid candidates, skipping stale
2350        // entries (evicted from window) instead of stopping at them.
2351        // Stored values are (relative_pos + 1); decode with wrapping_sub(1)
2352        // and recover absolute position via position_base + relative.
2353        // Break on self-loops (masked chain_idx collision at periodicity).
2354        // Cap total steps at 4x search depth to bound time spent skipping
2355        // stale entries while still finding valid candidates deeper in chain.
2356        let mut steps = 0;
2357        let max_chain_steps = self.search_depth * 4;
2358        while filled < self.search_depth && steps < max_chain_steps {
2359            if cur == HC_EMPTY {
2360                break;
2361            }
2362            let candidate_rel = cur.wrapping_sub(1) as usize;
2363            let candidate_abs = self.position_base + candidate_rel;
2364            let next = self.chain_table[candidate_rel & chain_mask];
2365            steps += 1;
2366            if next == cur {
2367                // Self-loop: two positions share chain_idx, stop to avoid
2368                // spinning on the same candidate forever.
2369                if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
2370                    buf[filled] = candidate_abs;
2371                }
2372                break;
2373            }
2374            cur = next;
2375            if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
2376                continue;
2377            }
2378            buf[filled] = candidate_abs;
2379            filled += 1;
2380        }
2381        buf
2382    }
2383
2384    fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2385        let rep = self.repcode_candidate(abs_pos, lit_len);
2386        let hash = self.hash_chain_candidate(abs_pos, lit_len);
2387        Self::better_candidate(rep, hash)
2388    }
2389
2390    fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2391        let concat = self.live_history();
2392        let current_idx = abs_pos - self.history_abs_start;
2393        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2394            return None;
2395        }
2396
2397        let mut best: Option<MatchCandidate> = None;
2398        for candidate_abs in self.chain_candidates(abs_pos) {
2399            if candidate_abs == usize::MAX {
2400                break;
2401            }
2402            let candidate_idx = candidate_abs - self.history_abs_start;
2403            let match_len =
2404                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2405            if match_len >= HC_MIN_MATCH_LEN {
2406                let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
2407                best = Self::better_candidate(best, Some(candidate));
2408                if best.is_some_and(|b| b.match_len >= self.target_len) {
2409                    return best;
2410                }
2411            }
2412        }
2413        best
2414    }
2415
2416    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2417        let reps = if lit_len == 0 {
2418            [
2419                Some(self.offset_hist[1] as usize),
2420                Some(self.offset_hist[2] as usize),
2421                (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
2422            ]
2423        } else {
2424            [
2425                Some(self.offset_hist[0] as usize),
2426                Some(self.offset_hist[1] as usize),
2427                Some(self.offset_hist[2] as usize),
2428            ]
2429        };
2430
2431        let concat = self.live_history();
2432        let current_idx = abs_pos - self.history_abs_start;
2433        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2434            return None;
2435        }
2436
2437        let mut best = None;
2438        for rep in reps.into_iter().flatten() {
2439            if rep == 0 || rep > abs_pos {
2440                continue;
2441            }
2442            let candidate_pos = abs_pos - rep;
2443            if candidate_pos < self.history_abs_start {
2444                continue;
2445            }
2446            let candidate_idx = candidate_pos - self.history_abs_start;
2447            let match_len =
2448                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2449            if match_len >= HC_MIN_MATCH_LEN {
2450                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2451                best = Self::better_candidate(best, Some(candidate));
2452            }
2453        }
2454        best
2455    }
2456
2457    fn extend_backwards(
2458        &self,
2459        mut candidate_pos: usize,
2460        mut abs_pos: usize,
2461        mut match_len: usize,
2462        lit_len: usize,
2463    ) -> MatchCandidate {
2464        let concat = self.live_history();
2465        let min_abs_pos = abs_pos - lit_len;
2466        while abs_pos > min_abs_pos
2467            && candidate_pos > self.history_abs_start
2468            && concat[candidate_pos - self.history_abs_start - 1]
2469                == concat[abs_pos - self.history_abs_start - 1]
2470        {
2471            candidate_pos -= 1;
2472            abs_pos -= 1;
2473            match_len += 1;
2474        }
2475        MatchCandidate {
2476            start: abs_pos,
2477            offset: abs_pos - candidate_pos,
2478            match_len,
2479        }
2480    }
2481
2482    fn better_candidate(
2483        lhs: Option<MatchCandidate>,
2484        rhs: Option<MatchCandidate>,
2485    ) -> Option<MatchCandidate> {
2486        match (lhs, rhs) {
2487            (None, other) | (other, None) => other,
2488            (Some(lhs), Some(rhs)) => {
2489                let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
2490                let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
2491                if rhs_gain > lhs_gain {
2492                    Some(rhs)
2493                } else {
2494                    Some(lhs)
2495                }
2496            }
2497        }
2498    }
2499
2500    fn match_gain(match_len: usize, offset: usize) -> i32 {
2501        debug_assert!(
2502            offset > 0,
2503            "zstd offsets are 1-indexed, offset=0 is invalid"
2504        );
2505        let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
2506        (match_len as i32) * 4 - offset_bits
2507    }
2508
2509    // Lazy lookahead queries pos+1/pos+2 before they are inserted into hash
2510    // tables — matching C zstd behavior. Seeding before comparing would let a
2511    // position match against itself, changing semantics.
2512    fn pick_lazy_match(
2513        &self,
2514        abs_pos: usize,
2515        lit_len: usize,
2516        best: Option<MatchCandidate>,
2517    ) -> Option<MatchCandidate> {
2518        let best = best?;
2519        if best.match_len >= self.target_len
2520            || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
2521        {
2522            return Some(best);
2523        }
2524
2525        let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
2526
2527        // Lazy check: evaluate pos+1
2528        let next = self.find_best_match(abs_pos + 1, lit_len + 1);
2529        if let Some(next) = next {
2530            let next_gain = Self::match_gain(next.match_len, next.offset);
2531            if next_gain > current_gain {
2532                return None;
2533            }
2534        }
2535
2536        // Lazy2 check: also evaluate pos+2
2537        if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
2538            let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
2539            if let Some(next2) = next2 {
2540                let next2_gain = Self::match_gain(next2.match_len, next2.offset);
2541                // Must beat current gain + extra literal cost
2542                if next2_gain > current_gain + 4 {
2543                    return None;
2544                }
2545            }
2546        }
2547
2548        Some(best)
2549    }
2550}
2551
2552#[test]
2553fn matches() {
2554    let mut matcher = MatchGenerator::new(1000);
2555    let mut original_data = Vec::new();
2556    let mut reconstructed = Vec::new();
2557
2558    let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
2559        Sequence::Literals { literals } => {
2560            assert!(!literals.is_empty());
2561            reconstructed.extend_from_slice(literals);
2562        }
2563        Sequence::Triple {
2564            literals,
2565            offset,
2566            match_len,
2567        } => {
2568            assert!(offset > 0);
2569            assert!(match_len >= MIN_MATCH_LEN);
2570            reconstructed.extend_from_slice(literals);
2571            assert!(offset <= reconstructed.len());
2572            let start = reconstructed.len() - offset;
2573            for i in 0..match_len {
2574                let byte = reconstructed[start + i];
2575                reconstructed.push(byte);
2576            }
2577        }
2578    };
2579
2580    matcher.add_data(
2581        alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2582        SuffixStore::with_capacity(100),
2583        |_, _| {},
2584    );
2585    original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
2586
2587    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2588
2589    assert!(!matcher.next_sequence(|_| {}));
2590
2591    matcher.add_data(
2592        alloc::vec![
2593            1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2594        ],
2595        SuffixStore::with_capacity(100),
2596        |_, _| {},
2597    );
2598    original_data.extend_from_slice(&[
2599        1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2600    ]);
2601
2602    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2603    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2604    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2605    assert!(!matcher.next_sequence(|_| {}));
2606
2607    matcher.add_data(
2608        alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
2609        SuffixStore::with_capacity(100),
2610        |_, _| {},
2611    );
2612    original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
2613
2614    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2615    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2616    assert!(!matcher.next_sequence(|_| {}));
2617
2618    matcher.add_data(
2619        alloc::vec![0, 0, 0, 0, 0],
2620        SuffixStore::with_capacity(100),
2621        |_, _| {},
2622    );
2623    original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
2624
2625    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2626    assert!(!matcher.next_sequence(|_| {}));
2627
2628    matcher.add_data(
2629        alloc::vec![7, 8, 9, 10, 11],
2630        SuffixStore::with_capacity(100),
2631        |_, _| {},
2632    );
2633    original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
2634
2635    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2636    assert!(!matcher.next_sequence(|_| {}));
2637
2638    matcher.add_data(
2639        alloc::vec![1, 3, 5, 7, 9],
2640        SuffixStore::with_capacity(100),
2641        |_, _| {},
2642    );
2643    matcher.skip_matching();
2644    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2645    reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
2646    assert!(!matcher.next_sequence(|_| {}));
2647
2648    matcher.add_data(
2649        alloc::vec![1, 3, 5, 7, 9],
2650        SuffixStore::with_capacity(100),
2651        |_, _| {},
2652    );
2653    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2654
2655    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2656    assert!(!matcher.next_sequence(|_| {}));
2657
2658    matcher.add_data(
2659        alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
2660        SuffixStore::with_capacity(100),
2661        |_, _| {},
2662    );
2663    original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
2664
2665    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2666    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2667    assert!(!matcher.next_sequence(|_| {}));
2668
2669    assert_eq!(reconstructed, original_data);
2670}
2671
2672#[test]
2673fn dfast_matches_roundtrip_multi_block_pattern() {
2674    let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
2675    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2676    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2677
2678    let mut matcher = DfastMatchGenerator::new(1 << 22);
2679    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2680        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2681        Sequence::Triple {
2682            literals,
2683            offset,
2684            match_len,
2685        } => {
2686            decoded.extend_from_slice(literals);
2687            let start = decoded.len() - offset;
2688            for i in 0..match_len {
2689                let byte = decoded[start + i];
2690                decoded.push(byte);
2691            }
2692        }
2693    };
2694
2695    matcher.add_data(first_block.clone(), |_| {});
2696    let mut history = Vec::new();
2697    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2698    assert_eq!(history, first_block);
2699
2700    matcher.add_data(second_block.clone(), |_| {});
2701    let prefix_len = history.len();
2702    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2703
2704    assert_eq!(&history[prefix_len..], second_block.as_slice());
2705}
2706
2707#[test]
2708fn driver_switches_backends_and_initializes_dfast_via_reset() {
2709    let mut driver = MatchGeneratorDriver::new(32, 2);
2710
2711    driver.reset(CompressionLevel::Default);
2712    assert_eq!(driver.active_backend, MatcherBackend::Dfast);
2713    assert_eq!(driver.window_size(), (1u64 << 22));
2714
2715    let mut first = driver.get_next_space();
2716    first[..12].copy_from_slice(b"abcabcabcabc");
2717    first.truncate(12);
2718    driver.commit_space(first);
2719    assert_eq!(driver.get_last_space(), b"abcabcabcabc");
2720    driver.skip_matching();
2721
2722    let mut second = driver.get_next_space();
2723    second[..12].copy_from_slice(b"abcabcabcabc");
2724    second.truncate(12);
2725    driver.commit_space(second);
2726
2727    let mut reconstructed = b"abcabcabcabc".to_vec();
2728    driver.start_matching(|seq| match seq {
2729        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2730        Sequence::Triple {
2731            literals,
2732            offset,
2733            match_len,
2734        } => {
2735            reconstructed.extend_from_slice(literals);
2736            let start = reconstructed.len() - offset;
2737            for i in 0..match_len {
2738                let byte = reconstructed[start + i];
2739                reconstructed.push(byte);
2740            }
2741        }
2742    });
2743    assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
2744
2745    driver.reset(CompressionLevel::Fastest);
2746    assert_eq!(driver.window_size(), (1u64 << 17));
2747}
2748
2749#[test]
2750fn driver_level4_selects_row_backend() {
2751    let mut driver = MatchGeneratorDriver::new(32, 2);
2752    driver.reset(CompressionLevel::Level(4));
2753    assert_eq!(driver.active_backend, MatcherBackend::Row);
2754}
2755
2756#[test]
2757fn driver_small_source_hint_shrinks_dfast_hash_tables() {
2758    let mut driver = MatchGeneratorDriver::new(32, 2);
2759
2760    driver.reset(CompressionLevel::Level(2));
2761    let mut space = driver.get_next_space();
2762    space[..12].copy_from_slice(b"abcabcabcabc");
2763    space.truncate(12);
2764    driver.commit_space(space);
2765    driver.skip_matching();
2766    let full_tables = driver.dfast_matcher().short_hash.len();
2767    assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
2768
2769    driver.set_source_size_hint(1024);
2770    driver.reset(CompressionLevel::Level(2));
2771    let mut space = driver.get_next_space();
2772    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2773    space.truncate(12);
2774    driver.commit_space(space);
2775    driver.skip_matching();
2776    let hinted_tables = driver.dfast_matcher().short_hash.len();
2777
2778    assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
2779    assert_eq!(hinted_tables, 1 << MIN_HINTED_WINDOW_LOG);
2780    assert!(
2781        hinted_tables < full_tables,
2782        "tiny source hint should reduce dfast table footprint"
2783    );
2784}
2785
2786#[test]
2787fn driver_small_source_hint_shrinks_row_hash_tables() {
2788    let mut driver = MatchGeneratorDriver::new(32, 2);
2789
2790    driver.reset(CompressionLevel::Level(4));
2791    let mut space = driver.get_next_space();
2792    space[..12].copy_from_slice(b"abcabcabcabc");
2793    space.truncate(12);
2794    driver.commit_space(space);
2795    driver.skip_matching();
2796    let full_rows = driver.row_matcher().row_heads.len();
2797    assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
2798
2799    driver.set_source_size_hint(1024);
2800    driver.reset(CompressionLevel::Level(4));
2801    let mut space = driver.get_next_space();
2802    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2803    space.truncate(12);
2804    driver.commit_space(space);
2805    driver.skip_matching();
2806    let hinted_rows = driver.row_matcher().row_heads.len();
2807
2808    assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
2809    assert_eq!(
2810        hinted_rows,
2811        1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
2812    );
2813    assert!(
2814        hinted_rows < full_rows,
2815        "tiny source hint should reduce row hash table footprint"
2816    );
2817}
2818
2819#[test]
2820fn row_matches_roundtrip_multi_block_pattern() {
2821    let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
2822    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2823    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2824
2825    let mut matcher = RowMatchGenerator::new(1 << 22);
2826    matcher.configure(ROW_CONFIG);
2827    matcher.ensure_tables();
2828    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2829        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2830        Sequence::Triple {
2831            literals,
2832            offset,
2833            match_len,
2834        } => {
2835            decoded.extend_from_slice(literals);
2836            let start = decoded.len() - offset;
2837            for i in 0..match_len {
2838                let byte = decoded[start + i];
2839                decoded.push(byte);
2840            }
2841        }
2842    };
2843
2844    matcher.add_data(first_block.clone(), |_| {});
2845    let mut history = Vec::new();
2846    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2847    assert_eq!(history, first_block);
2848
2849    matcher.add_data(second_block.clone(), |_| {});
2850    let prefix_len = history.len();
2851    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2852
2853    assert_eq!(&history[prefix_len..], second_block.as_slice());
2854
2855    // Force a literals-only pass so the Sequence::Literals arm is exercised.
2856    let third_block: Vec<u8> = (0u8..=255).collect();
2857    matcher.add_data(third_block.clone(), |_| {});
2858    let third_prefix = history.len();
2859    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2860    assert_eq!(&history[third_prefix..], third_block.as_slice());
2861}
2862
2863#[test]
2864fn row_short_block_emits_literals_only() {
2865    let mut matcher = RowMatchGenerator::new(1 << 22);
2866    matcher.configure(ROW_CONFIG);
2867
2868    matcher.add_data(b"abcde".to_vec(), |_| {});
2869
2870    let mut saw_triple = false;
2871    let mut reconstructed = Vec::new();
2872    matcher.start_matching(|seq| match seq {
2873        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2874        Sequence::Triple { .. } => saw_triple = true,
2875    });
2876
2877    assert!(
2878        !saw_triple,
2879        "row backend must not emit triples for short blocks"
2880    );
2881    assert_eq!(reconstructed, b"abcde");
2882
2883    // Then feed a clearly matchable block and ensure the Triple arm is reachable.
2884    saw_triple = false;
2885    matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
2886    matcher.start_matching(|seq| {
2887        if let Sequence::Triple { .. } = seq {
2888            saw_triple = true;
2889        }
2890    });
2891    assert!(
2892        saw_triple,
2893        "row backend should emit triples on repeated data"
2894    );
2895}
2896
2897#[test]
2898fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
2899    let mut matcher = RowMatchGenerator::new(1 << 22);
2900    matcher.configure(ROW_CONFIG);
2901    matcher.add_data(b"abcabc".to_vec(), |_| {});
2902
2903    let best = MatchCandidate {
2904        start: 0,
2905        offset: 1,
2906        match_len: ROW_MIN_MATCH_LEN,
2907    };
2908    let picked = matcher
2909        .pick_lazy_match(0, 0, Some(best))
2910        .expect("best candidate must survive");
2911
2912    assert_eq!(picked.start, best.start);
2913    assert_eq!(picked.offset, best.offset);
2914    assert_eq!(picked.match_len, best.match_len);
2915}
2916
2917#[test]
2918fn row_backfills_previous_block_tail_for_cross_boundary_match() {
2919    let mut matcher = RowMatchGenerator::new(1 << 22);
2920    matcher.configure(ROW_CONFIG);
2921
2922    let mut first_block = alloc::vec![0xA5; 64];
2923    first_block.extend_from_slice(b"XYZ");
2924    let second_block = b"XYZXYZtail".to_vec();
2925
2926    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2927        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2928        Sequence::Triple {
2929            literals,
2930            offset,
2931            match_len,
2932        } => {
2933            decoded.extend_from_slice(literals);
2934            let start = decoded.len() - offset;
2935            for i in 0..match_len {
2936                let byte = decoded[start + i];
2937                decoded.push(byte);
2938            }
2939        }
2940    };
2941
2942    matcher.add_data(first_block.clone(), |_| {});
2943    let mut reconstructed = Vec::new();
2944    matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
2945    assert_eq!(reconstructed, first_block);
2946
2947    matcher.add_data(second_block.clone(), |_| {});
2948    let mut saw_cross_boundary = false;
2949    let prefix_len = reconstructed.len();
2950    matcher.start_matching(|seq| {
2951        if let Sequence::Triple {
2952            literals,
2953            offset,
2954            match_len,
2955        } = seq
2956            && literals.is_empty()
2957            && offset == 3
2958            && match_len >= ROW_MIN_MATCH_LEN
2959        {
2960            saw_cross_boundary = true;
2961        }
2962        replay_sequence(&mut reconstructed, seq);
2963    });
2964
2965    assert!(
2966        saw_cross_boundary,
2967        "row matcher should reuse the 3-byte previous-block tail"
2968    );
2969    assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
2970}
2971
2972#[test]
2973fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
2974    let mut driver = MatchGeneratorDriver::new(32, 2);
2975
2976    driver.reset(CompressionLevel::Level(2));
2977    let mut space = driver.get_next_space();
2978    space[..12].copy_from_slice(b"abcabcabcabc");
2979    space.truncate(12);
2980    driver.commit_space(space);
2981    driver.skip_matching();
2982
2983    let table_len = driver.dfast_matcher().short_hash.len();
2984    assert_eq!(
2985        table_len,
2986        1 << DFAST_HASH_BITS,
2987        "unhinted Level(2) should keep default dfast table size"
2988    );
2989}
2990
2991#[test]
2992fn simple_backend_rejects_undersized_pooled_suffix_store() {
2993    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
2994    driver.reset(CompressionLevel::Fastest);
2995
2996    driver.suffix_pool.push(SuffixStore::with_capacity(1024));
2997
2998    let mut space = driver.get_next_space();
2999    space.clear();
3000    space.resize(4096, 0xAB);
3001    driver.commit_space(space);
3002
3003    let last_suffix_slots = driver
3004        .match_generator
3005        .window
3006        .last()
3007        .expect("window entry must exist after commit")
3008        .suffixes
3009        .slots
3010        .len();
3011    assert!(
3012        last_suffix_slots >= 4096,
3013        "undersized pooled suffix store must not be reused for larger blocks"
3014    );
3015}
3016
3017#[test]
3018fn source_hint_clamps_driver_slice_size_to_window() {
3019    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3020    driver.set_source_size_hint(1024);
3021    driver.reset(CompressionLevel::Default);
3022
3023    let window = driver.window_size() as usize;
3024    assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
3025    assert_eq!(driver.slice_size, window);
3026
3027    let space = driver.get_next_space();
3028    assert_eq!(space.len(), window);
3029    driver.commit_space(space);
3030}
3031
3032#[test]
3033fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
3034    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3035    driver.reset(CompressionLevel::Default);
3036
3037    let large = driver.get_next_space();
3038    let large_capacity = large.capacity();
3039    assert!(large_capacity >= 128 * 1024);
3040    driver.commit_space(large);
3041
3042    driver.set_source_size_hint(1024);
3043    driver.reset(CompressionLevel::Default);
3044
3045    let small = driver.get_next_space();
3046    assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
3047    assert!(
3048        small.capacity() >= large_capacity,
3049        "pooled buffer capacity should be preserved to avoid shrink/grow churn"
3050    );
3051}
3052
3053#[test]
3054fn driver_best_to_fastest_releases_oversized_hc_tables() {
3055    let mut driver = MatchGeneratorDriver::new(32, 2);
3056
3057    // Initialize at Best — allocates large HC tables (2M hash, 1M chain).
3058    driver.reset(CompressionLevel::Best);
3059    assert_eq!(driver.window_size(), (1u64 << 24));
3060
3061    // Feed data so tables are actually allocated via ensure_tables().
3062    let mut space = driver.get_next_space();
3063    space[..12].copy_from_slice(b"abcabcabcabc");
3064    space.truncate(12);
3065    driver.commit_space(space);
3066    driver.skip_matching();
3067
3068    // Switch to Fastest — must release HC tables.
3069    driver.reset(CompressionLevel::Fastest);
3070    assert_eq!(driver.window_size(), (1u64 << 17));
3071
3072    // HC matcher should have empty tables after backend switch.
3073    let hc = driver.hc_match_generator.as_ref().unwrap();
3074    assert!(
3075        hc.hash_table.is_empty(),
3076        "HC hash_table should be released after switching away from Best"
3077    );
3078    assert!(
3079        hc.chain_table.is_empty(),
3080        "HC chain_table should be released after switching away from Best"
3081    );
3082}
3083
3084#[test]
3085fn driver_better_to_best_resizes_hc_tables() {
3086    let mut driver = MatchGeneratorDriver::new(32, 2);
3087
3088    // Initialize at Better — allocates small HC tables (1M hash, 512K chain).
3089    driver.reset(CompressionLevel::Better);
3090    assert_eq!(driver.window_size(), (1u64 << 23));
3091
3092    let mut space = driver.get_next_space();
3093    space[..12].copy_from_slice(b"abcabcabcabc");
3094    space.truncate(12);
3095    driver.commit_space(space);
3096    driver.skip_matching();
3097
3098    let hc = driver.hc_match_generator.as_ref().unwrap();
3099    let better_hash_len = hc.hash_table.len();
3100    let better_chain_len = hc.chain_table.len();
3101
3102    // Switch to Best — must resize to larger tables.
3103    driver.reset(CompressionLevel::Best);
3104    assert_eq!(driver.window_size(), (1u64 << 24));
3105
3106    // Feed data to trigger ensure_tables with new sizes.
3107    let mut space = driver.get_next_space();
3108    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3109    space.truncate(12);
3110    driver.commit_space(space);
3111    driver.skip_matching();
3112
3113    let hc = driver.hc_match_generator.as_ref().unwrap();
3114    assert!(
3115        hc.hash_table.len() > better_hash_len,
3116        "Best hash_table ({}) should be larger than Better ({})",
3117        hc.hash_table.len(),
3118        better_hash_len
3119    );
3120    assert!(
3121        hc.chain_table.len() > better_chain_len,
3122        "Best chain_table ({}) should be larger than Better ({})",
3123        hc.chain_table.len(),
3124        better_chain_len
3125    );
3126}
3127
3128#[test]
3129fn prime_with_dictionary_preserves_history_for_first_full_block() {
3130    let mut driver = MatchGeneratorDriver::new(8, 1);
3131    driver.reset(CompressionLevel::Fastest);
3132
3133    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3134
3135    let mut space = driver.get_next_space();
3136    space.clear();
3137    space.extend_from_slice(b"abcdefgh");
3138    driver.commit_space(space);
3139
3140    let mut saw_match = false;
3141    driver.start_matching(|seq| {
3142        if let Sequence::Triple {
3143            literals,
3144            offset,
3145            match_len,
3146        } = seq
3147            && literals.is_empty()
3148            && offset == 8
3149            && match_len >= MIN_MATCH_LEN
3150        {
3151            saw_match = true;
3152        }
3153    });
3154
3155    assert!(
3156        saw_match,
3157        "first full block should still match dictionary-primed history"
3158    );
3159}
3160
3161#[test]
3162fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
3163    let mut driver = MatchGeneratorDriver::new(8, 1);
3164    driver.reset(CompressionLevel::Fastest);
3165
3166    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3167
3168    let mut space = driver.get_next_space();
3169    space.clear();
3170    space.extend_from_slice(b"abcdefgh");
3171    driver.commit_space(space);
3172
3173    let mut saw_match = false;
3174    driver.start_matching(|seq| {
3175        if let Sequence::Triple {
3176            literals,
3177            offset,
3178            match_len,
3179        } = seq
3180            && literals.is_empty()
3181            && offset == 24
3182            && match_len >= MIN_MATCH_LEN
3183        {
3184            saw_match = true;
3185        }
3186    });
3187
3188    assert!(
3189        saw_match,
3190        "dictionary bytes should remain addressable until frame output exceeds the live window"
3191    );
3192}
3193
3194#[test]
3195fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
3196    let mut driver = MatchGeneratorDriver::new(8, 1);
3197    driver.reset(CompressionLevel::Fastest);
3198
3199    driver.prime_with_dictionary(&[], [11, 7, 3]);
3200
3201    assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
3202}
3203
3204#[test]
3205fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
3206    let mut driver = MatchGeneratorDriver::new(8, 1);
3207    driver.reset(CompressionLevel::Level(2));
3208
3209    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3210
3211    let mut space = driver.get_next_space();
3212    space.clear();
3213    space.extend_from_slice(b"abcdefgh");
3214    driver.commit_space(space);
3215
3216    let mut saw_match = false;
3217    driver.start_matching(|seq| {
3218        if let Sequence::Triple {
3219            literals,
3220            offset,
3221            match_len,
3222        } = seq
3223            && literals.is_empty()
3224            && offset == 8
3225            && match_len >= DFAST_MIN_MATCH_LEN
3226        {
3227            saw_match = true;
3228        }
3229    });
3230
3231    assert!(
3232        saw_match,
3233        "dfast backend should match dictionary-primed history in first full block"
3234    );
3235}
3236
3237#[test]
3238fn prime_with_dictionary_does_not_inflate_reported_window_size() {
3239    let mut driver = MatchGeneratorDriver::new(8, 1);
3240    driver.reset(CompressionLevel::Fastest);
3241
3242    let before = driver.window_size();
3243    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3244    let after = driver.window_size();
3245
3246    assert_eq!(
3247        after, before,
3248        "dictionary retention budget must not change reported frame window size"
3249    );
3250}
3251
3252#[test]
3253fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
3254    let mut driver = MatchGeneratorDriver::new(8, 2);
3255    driver.reset(CompressionLevel::Fastest);
3256
3257    // This dictionary leaves a 1-byte tail chunk (capacity=1 suffix table),
3258    // which should never be committed to the matcher window.
3259    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3260
3261    assert!(
3262        driver
3263            .match_generator
3264            .window
3265            .iter()
3266            .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
3267        "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
3268    );
3269}
3270
3271#[test]
3272fn prime_with_dictionary_counts_only_committed_tail_budget() {
3273    let mut driver = MatchGeneratorDriver::new(8, 1);
3274    driver.reset(CompressionLevel::Fastest);
3275
3276    let before = driver.match_generator.max_window_size;
3277    // One full slice plus a 1-byte tail that cannot be committed.
3278    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3279
3280    assert_eq!(
3281        driver.match_generator.max_window_size,
3282        before + 8,
3283        "retention budget must account only for dictionary bytes actually committed to history"
3284    );
3285}
3286
3287#[test]
3288fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
3289    let mut driver = MatchGeneratorDriver::new(8, 1);
3290    driver.reset(CompressionLevel::Level(2));
3291
3292    let before = driver.dfast_matcher().max_window_size;
3293    // One full slice plus a 4-byte tail. Dfast can still use this tail through
3294    // short-hash overlap into the next block, so it should stay retained.
3295    driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
3296
3297    assert_eq!(
3298        driver.dfast_matcher().max_window_size,
3299        before + 12,
3300        "dfast retention budget should include 4-byte dictionary tails"
3301    );
3302}
3303
3304#[test]
3305fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
3306    let mut driver = MatchGeneratorDriver::new(8, 1);
3307    driver.reset(CompressionLevel::Level(4));
3308
3309    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3310
3311    let mut space = driver.get_next_space();
3312    space.clear();
3313    space.extend_from_slice(b"abcdefgh");
3314    driver.commit_space(space);
3315
3316    let mut saw_match = false;
3317    driver.start_matching(|seq| {
3318        if let Sequence::Triple {
3319            literals,
3320            offset,
3321            match_len,
3322        } = seq
3323            && literals.is_empty()
3324            && offset == 8
3325            && match_len >= ROW_MIN_MATCH_LEN
3326        {
3327            saw_match = true;
3328        }
3329    });
3330
3331    assert!(
3332        saw_match,
3333        "row backend should match dictionary-primed history in first full block"
3334    );
3335}
3336
3337#[test]
3338fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
3339    let mut driver = MatchGeneratorDriver::new(8, 1);
3340    driver.reset(CompressionLevel::Level(4));
3341
3342    let base_window = driver.row_matcher().max_window_size;
3343    // Slice size is 8. The trailing byte cannot be committed (<4 tail),
3344    // so it must be subtracted from retained budget.
3345    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3346
3347    assert_eq!(
3348        driver.row_matcher().max_window_size,
3349        base_window + 8,
3350        "row retained window must exclude uncommitted 1-byte tail"
3351    );
3352}
3353
3354#[test]
3355fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
3356    let mut driver = MatchGeneratorDriver::new(8, 1);
3357    driver.reset(CompressionLevel::Level(4));
3358    // Keep live window tiny so dictionary-primed slices are evicted quickly.
3359    driver.row_matcher_mut().max_window_size = 8;
3360    driver.reported_window_size = 8;
3361
3362    let base_window = driver.row_matcher().max_window_size;
3363    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3364    assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
3365
3366    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3367        let mut space = driver.get_next_space();
3368        space.clear();
3369        space.extend_from_slice(block);
3370        driver.commit_space(space);
3371        driver.skip_matching();
3372    }
3373
3374    assert_eq!(
3375        driver.dictionary_retained_budget, 0,
3376        "dictionary budget should be fully retired once primed dict slices are evicted"
3377    );
3378    assert_eq!(
3379        driver.row_matcher().max_window_size,
3380        base_window,
3381        "retired dictionary budget must not remain reusable for live history"
3382    );
3383}
3384
3385#[test]
3386fn row_get_last_space_and_reset_to_fastest_clears_window() {
3387    let mut driver = MatchGeneratorDriver::new(8, 1);
3388    driver.reset(CompressionLevel::Level(4));
3389
3390    let mut space = driver.get_next_space();
3391    space.clear();
3392    space.extend_from_slice(b"row-data");
3393    driver.commit_space(space);
3394
3395    assert_eq!(driver.get_last_space(), b"row-data");
3396
3397    driver.reset(CompressionLevel::Fastest);
3398    assert_eq!(driver.active_backend, MatcherBackend::Simple);
3399    assert!(driver.row_matcher().window.is_empty());
3400}
3401
3402/// Ensures switching from Row to Simple returns pooled buffers and row tables.
3403#[test]
3404fn driver_reset_from_row_backend_reclaims_row_buffer_pool() {
3405    let mut driver = MatchGeneratorDriver::new(8, 1);
3406    driver.reset(CompressionLevel::Level(4));
3407    assert_eq!(driver.active_backend, MatcherBackend::Row);
3408
3409    // Ensure the row matcher option is initialized so reset() executes
3410    // the Row backend retirement path.
3411    let _ = driver.row_matcher();
3412    let mut space = driver.get_next_space();
3413    space.extend_from_slice(b"row-data-to-recycle");
3414    driver.commit_space(space);
3415
3416    let before_pool = driver.vec_pool.len();
3417    driver.reset(CompressionLevel::Fastest);
3418
3419    assert_eq!(driver.active_backend, MatcherBackend::Simple);
3420    let row = driver
3421        .row_match_generator
3422        .as_ref()
3423        .expect("row matcher should remain allocated after switch");
3424    assert!(row.row_heads.is_empty());
3425    assert!(row.row_positions.is_empty());
3426    assert!(row.row_tags.is_empty());
3427    assert!(
3428        driver.vec_pool.len() >= before_pool,
3429        "row reset should recycle row history buffers"
3430    );
3431}
3432
3433/// Guards the optional row backend retirement path when no row matcher was allocated.
3434#[test]
3435fn driver_reset_from_row_backend_tolerates_missing_row_matcher() {
3436    let mut driver = MatchGeneratorDriver::new(8, 1);
3437    driver.active_backend = MatcherBackend::Row;
3438    driver.row_match_generator = None;
3439
3440    driver.reset(CompressionLevel::Fastest);
3441
3442    assert_eq!(driver.active_backend, MatcherBackend::Simple);
3443}
3444
3445#[test]
3446fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
3447    let mut params = resolve_level_params(CompressionLevel::Level(4), None);
3448    params.window_log = 22;
3449    let adjusted = adjust_params_for_source_size(params, 0);
3450    assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
3451}
3452
3453#[test]
3454fn row_pick_lazy_returns_none_when_next_is_better() {
3455    let mut matcher = RowMatchGenerator::new(1 << 22);
3456    matcher.configure(ROW_CONFIG);
3457    matcher.add_data(alloc::vec![b'a'; 64], |_| {});
3458    matcher.ensure_tables();
3459
3460    let abs_pos = matcher.history_abs_start + 16;
3461    let best = MatchCandidate {
3462        start: abs_pos,
3463        offset: 8,
3464        match_len: ROW_MIN_MATCH_LEN,
3465    };
3466    assert!(
3467        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3468        "lazy picker should defer when next position is clearly better"
3469    );
3470}
3471
3472#[test]
3473fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
3474    let mut matcher = RowMatchGenerator::new(1 << 22);
3475    matcher.configure(ROW_CONFIG);
3476    matcher.lazy_depth = 2;
3477    matcher.search_depth = 0;
3478    matcher.offset_hist = [6, 9, 1];
3479
3480    let mut data = alloc::vec![b'x'; 40];
3481    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
3482    matcher.add_data(data, |_| {});
3483    matcher.ensure_tables();
3484
3485    let abs_pos = matcher.history_abs_start + 20;
3486    let best = matcher
3487        .best_match(abs_pos, 0)
3488        .expect("expected baseline repcode match");
3489    assert_eq!(best.offset, 9);
3490    assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3491
3492    if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
3493        assert!(next.match_len <= best.match_len);
3494    }
3495
3496    let next2 = matcher
3497        .best_match(abs_pos + 2, 2)
3498        .expect("expected +2 candidate");
3499    assert!(
3500        next2.match_len > best.match_len + 1,
3501        "+2 candidate must be significantly better for depth-2 lazy skip"
3502    );
3503    assert!(
3504        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3505        "lazy picker should defer when +2 candidate is significantly better"
3506    );
3507}
3508
3509#[test]
3510fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
3511    let mut matcher = RowMatchGenerator::new(1 << 22);
3512    matcher.configure(ROW_CONFIG);
3513    matcher.lazy_depth = 2;
3514    matcher.search_depth = 0;
3515    matcher.offset_hist = [6, 9, 1];
3516
3517    let mut data = alloc::vec![b'x'; 40];
3518    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
3519    matcher.add_data(data, |_| {});
3520    matcher.ensure_tables();
3521
3522    let abs_pos = matcher.history_abs_start + 20;
3523    let best = matcher
3524        .best_match(abs_pos, 0)
3525        .expect("expected baseline repcode match");
3526    assert_eq!(best.offset, 9);
3527    assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3528
3529    let next2 = matcher
3530        .best_match(abs_pos + 2, 2)
3531        .expect("expected +2 candidate");
3532    assert_eq!(next2.match_len, best.match_len + 1);
3533    let chosen = matcher
3534        .pick_lazy_match(abs_pos, 0, Some(best))
3535        .expect("lazy picker should keep current best");
3536    assert_eq!(chosen.start, best.start);
3537    assert_eq!(chosen.offset, best.offset);
3538    assert_eq!(chosen.match_len, best.match_len);
3539}
3540
3541/// Verifies row/tag extraction uses the high bits of the multiplicative hash.
3542#[test]
3543fn row_hash_and_row_extracts_high_bits() {
3544    let mut matcher = RowMatchGenerator::new(1 << 22);
3545    matcher.configure(ROW_CONFIG);
3546    matcher.add_data(
3547        alloc::vec![
3548            0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
3549            0x70, 0x80,
3550        ],
3551        |_| {},
3552    );
3553    matcher.ensure_tables();
3554
3555    let pos = matcher.history_abs_start + 8;
3556    let (row, tag) = matcher
3557        .hash_and_row(pos)
3558        .expect("row hash should be available");
3559
3560    let idx = pos - matcher.history_abs_start;
3561    let concat = matcher.live_history();
3562    let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
3563    const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
3564    let hash = value.wrapping_mul(PRIME);
3565    let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
3566    let combined = hash >> (u64::BITS as usize - total_bits);
3567    let expected_row =
3568        ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
3569    let expected_tag = combined as u8;
3570
3571    assert_eq!(row, expected_row);
3572    assert_eq!(tag, expected_tag);
3573}
3574
3575#[test]
3576fn row_repcode_skips_candidate_before_history_start() {
3577    let mut matcher = RowMatchGenerator::new(1 << 22);
3578    matcher.configure(ROW_CONFIG);
3579    matcher.history = alloc::vec![b'a'; 20];
3580    matcher.history_start = 0;
3581    matcher.history_abs_start = 10;
3582    matcher.offset_hist = [3, 0, 0];
3583
3584    assert!(matcher.repcode_candidate(12, 1).is_none());
3585}
3586
3587#[test]
3588fn row_repcode_returns_none_when_position_too_close_to_history_end() {
3589    let mut matcher = RowMatchGenerator::new(1 << 22);
3590    matcher.configure(ROW_CONFIG);
3591    matcher.history = b"abcde".to_vec();
3592    matcher.history_start = 0;
3593    matcher.history_abs_start = 0;
3594    matcher.offset_hist = [1, 0, 0];
3595
3596    assert!(matcher.repcode_candidate(4, 1).is_none());
3597}
3598
3599#[test]
3600fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
3601    let mut matcher = RowMatchGenerator::new(1 << 22);
3602    matcher.configure(ROW_CONFIG);
3603    matcher.history = b"abcde".to_vec();
3604    matcher.history_start = 0;
3605    matcher.history_abs_start = 0;
3606
3607    assert!(matcher.row_candidate(0, 0).is_none());
3608}
3609
3610#[test]
3611fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
3612    let mut hc = HcMatchGenerator::new(32);
3613    hc.history = b"abc".to_vec();
3614    hc.history_start = 0;
3615    hc.history_abs_start = 0;
3616    hc.ensure_tables();
3617
3618    let candidates = hc.chain_candidates(0);
3619    assert!(candidates.iter().all(|&pos| pos == usize::MAX));
3620}
3621
3622#[test]
3623fn hc_reset_refills_existing_tables_with_empty_sentinel() {
3624    let mut hc = HcMatchGenerator::new(32);
3625    hc.add_data(b"abcdeabcde".to_vec(), |_| {});
3626    hc.ensure_tables();
3627    assert!(!hc.hash_table.is_empty());
3628    assert!(!hc.chain_table.is_empty());
3629    hc.hash_table.fill(123);
3630    hc.chain_table.fill(456);
3631
3632    hc.reset(|_| {});
3633
3634    assert!(hc.hash_table.iter().all(|&v| v == HC_EMPTY));
3635    assert!(hc.chain_table.iter().all(|&v| v == HC_EMPTY));
3636}
3637
3638#[test]
3639fn hc_start_matching_returns_early_for_empty_current_block() {
3640    let mut hc = HcMatchGenerator::new(32);
3641    hc.add_data(Vec::new(), |_| {});
3642    let mut called = false;
3643    hc.start_matching(|_| called = true);
3644    assert!(!called, "empty current block should not emit sequences");
3645}
3646
3647#[test]
3648fn hc_compact_history_drains_when_threshold_crossed() {
3649    let mut hc = HcMatchGenerator::new(8);
3650    hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
3651    hc.history_start = 16;
3652    hc.compact_history();
3653    assert_eq!(hc.history_start, 0);
3654    assert_eq!(hc.history, b"qrstuvwxyz");
3655}
3656
3657#[test]
3658fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
3659    let mut hc = HcMatchGenerator::new(32);
3660    hc.history = b"abcdefghijklmnop".to_vec();
3661    hc.history_abs_start = 0;
3662    hc.position_base = 1;
3663    hc.ensure_tables();
3664    let before_hash = hc.hash_table.clone();
3665    let before_chain = hc.chain_table.clone();
3666
3667    hc.insert_position_no_rebase(0);
3668
3669    assert_eq!(hc.hash_table, before_hash);
3670    assert_eq!(hc.chain_table, before_chain);
3671}
3672
3673#[test]
3674fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
3675    let mut driver = MatchGeneratorDriver::new(8, 1);
3676    driver.reset(CompressionLevel::Fastest);
3677    // Use a small live window so dictionary-primed slices are evicted
3678    // quickly and budget retirement can be asserted deterministically.
3679    driver.match_generator.max_window_size = 8;
3680    driver.reported_window_size = 8;
3681
3682    let base_window = driver.match_generator.max_window_size;
3683    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3684    assert_eq!(driver.match_generator.max_window_size, base_window + 24);
3685
3686    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3687        let mut space = driver.get_next_space();
3688        space.clear();
3689        space.extend_from_slice(block);
3690        driver.commit_space(space);
3691        driver.skip_matching();
3692    }
3693
3694    assert_eq!(
3695        driver.dictionary_retained_budget, 0,
3696        "dictionary budget should be fully retired once primed dict slices are evicted"
3697    );
3698    assert_eq!(
3699        driver.match_generator.max_window_size, base_window,
3700        "retired dictionary budget must not remain reusable for live history"
3701    );
3702}
3703
3704#[test]
3705fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
3706    let mut driver = MatchGeneratorDriver::new(8, 1);
3707    driver.reset(CompressionLevel::Level(2));
3708    // Use a small live window in this regression so dictionary-primed slices are
3709    // evicted quickly and budget retirement can be asserted deterministically.
3710    driver.dfast_matcher_mut().max_window_size = 8;
3711    driver.reported_window_size = 8;
3712
3713    let base_window = driver.dfast_matcher().max_window_size;
3714    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3715    assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
3716
3717    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3718        let mut space = driver.get_next_space();
3719        space.clear();
3720        space.extend_from_slice(block);
3721        driver.commit_space(space);
3722        driver.skip_matching();
3723    }
3724
3725    assert_eq!(
3726        driver.dictionary_retained_budget, 0,
3727        "dictionary budget should be fully retired once primed dict slices are evicted"
3728    );
3729    assert_eq!(
3730        driver.dfast_matcher().max_window_size,
3731        base_window,
3732        "retired dictionary budget must not remain reusable for live history"
3733    );
3734}
3735
3736#[test]
3737fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
3738    let mut driver = MatchGeneratorDriver::new(8, 1);
3739    driver.reset(CompressionLevel::Better);
3740
3741    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3742
3743    let mut space = driver.get_next_space();
3744    space.clear();
3745    // Repeat the dictionary content so the HC matcher can find it.
3746    // HC_MIN_MATCH_LEN is 5, so an 8-byte match is well above threshold.
3747    space.extend_from_slice(b"abcdefgh");
3748    driver.commit_space(space);
3749
3750    let mut saw_match = false;
3751    driver.start_matching(|seq| {
3752        if let Sequence::Triple {
3753            literals,
3754            offset,
3755            match_len,
3756        } = seq
3757            && literals.is_empty()
3758            && offset == 8
3759            && match_len >= HC_MIN_MATCH_LEN
3760        {
3761            saw_match = true;
3762        }
3763    });
3764
3765    assert!(
3766        saw_match,
3767        "hash-chain backend should match dictionary-primed history in first full block"
3768    );
3769}
3770
3771#[test]
3772fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
3773    let mut driver = MatchGeneratorDriver::new(8, 1);
3774    driver.reset(CompressionLevel::Better);
3775    // Use a small live window so dictionary-primed slices are evicted quickly.
3776    driver.hc_matcher_mut().max_window_size = 8;
3777    driver.reported_window_size = 8;
3778
3779    let base_window = driver.hc_matcher().max_window_size;
3780    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3781    assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
3782
3783    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3784        let mut space = driver.get_next_space();
3785        space.clear();
3786        space.extend_from_slice(block);
3787        driver.commit_space(space);
3788        driver.skip_matching();
3789    }
3790
3791    assert_eq!(
3792        driver.dictionary_retained_budget, 0,
3793        "dictionary budget should be fully retired once primed dict slices are evicted"
3794    );
3795    assert_eq!(
3796        driver.hc_matcher().max_window_size,
3797        base_window,
3798        "retired dictionary budget must not remain reusable for live history"
3799    );
3800}
3801
3802#[test]
3803fn hc_rebases_positions_after_u32_boundary() {
3804    let mut matcher = HcMatchGenerator::new(64);
3805    matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3806    matcher.ensure_tables();
3807    matcher.position_base = 0;
3808    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
3809        Ok(value) => value,
3810        Err(_) => return,
3811    };
3812    // Simulate a long-running stream where absolute history positions crossed
3813    // the u32 range. Before #51 this disabled HC inserts entirely.
3814    matcher.history_abs_start = history_abs_start;
3815    matcher.skip_matching();
3816    assert_eq!(
3817        matcher.position_base, matcher.history_abs_start,
3818        "rebase should anchor to the oldest live absolute position"
3819    );
3820
3821    assert!(
3822        matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY),
3823        "HC hash table should still be populated after crossing u32 boundary"
3824    );
3825
3826    // Verify rebasing preserves candidate lookup, not just table population.
3827    let abs_pos = matcher.history_abs_start + 10;
3828    let candidates = matcher.chain_candidates(abs_pos);
3829    assert!(
3830        candidates.iter().any(|candidate| *candidate != usize::MAX),
3831        "chain_candidates should return valid matches after rebase"
3832    );
3833}
3834
3835#[test]
3836fn hc_rebase_rebuilds_only_inserted_prefix() {
3837    let mut matcher = HcMatchGenerator::new(64);
3838    matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3839    matcher.ensure_tables();
3840    matcher.position_base = 0;
3841    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
3842        Ok(value) => value,
3843        Err(_) => return,
3844    };
3845    matcher.history_abs_start = history_abs_start;
3846    let abs_pos = matcher.history_abs_start + 6;
3847
3848    let mut expected = HcMatchGenerator::new(64);
3849    expected.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
3850    expected.ensure_tables();
3851    expected.history_abs_start = history_abs_start;
3852    expected.position_base = expected.history_abs_start;
3853    expected.hash_table.fill(HC_EMPTY);
3854    expected.chain_table.fill(HC_EMPTY);
3855    for pos in expected.history_abs_start..abs_pos {
3856        expected.insert_position_no_rebase(pos);
3857    }
3858
3859    matcher.maybe_rebase_positions(abs_pos);
3860
3861    assert_eq!(
3862        matcher.position_base, matcher.history_abs_start,
3863        "rebase should still anchor to the oldest live absolute position"
3864    );
3865    assert_eq!(
3866        matcher.hash_table, expected.hash_table,
3867        "rebase must rebuild only positions already inserted before abs_pos"
3868    );
3869    assert_eq!(
3870        matcher.chain_table, expected.chain_table,
3871        "future positions must not be pre-seeded into HC chains during rebase"
3872    );
3873}
3874
3875#[test]
3876fn suffix_store_with_single_slot_does_not_panic_on_keying() {
3877    let mut suffixes = SuffixStore::with_capacity(1);
3878    suffixes.insert(b"abcde", 0);
3879    assert!(suffixes.contains_key(b"abcde"));
3880    assert_eq!(suffixes.get(b"abcde"), Some(0));
3881}
3882
3883#[test]
3884fn fastest_reset_uses_interleaved_hash_fill_step() {
3885    let mut driver = MatchGeneratorDriver::new(32, 2);
3886
3887    driver.reset(CompressionLevel::Uncompressed);
3888    assert_eq!(driver.match_generator.hash_fill_step, 1);
3889
3890    driver.reset(CompressionLevel::Fastest);
3891    assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
3892
3893    // Better uses the HashChain backend with lazy2; verify that the backend switch
3894    // happened and the lazy_depth is configured correctly.
3895    driver.reset(CompressionLevel::Better);
3896    assert_eq!(driver.active_backend, MatcherBackend::HashChain);
3897    assert_eq!(driver.window_size(), (1u64 << 23));
3898    assert_eq!(driver.hc_matcher().lazy_depth, 2);
3899}
3900
3901#[test]
3902fn simple_matcher_updates_offset_history_after_emitting_match() {
3903    let mut matcher = MatchGenerator::new(64);
3904    matcher.add_data(
3905        b"abcdeabcdeabcde".to_vec(),
3906        SuffixStore::with_capacity(64),
3907        |_, _| {},
3908    );
3909
3910    assert!(matcher.next_sequence(|seq| {
3911        assert_eq!(
3912            seq,
3913            Sequence::Triple {
3914                literals: b"abcde",
3915                offset: 5,
3916                match_len: 10,
3917            }
3918        );
3919    }));
3920    assert_eq!(matcher.offset_hist, [5, 1, 4]);
3921}
3922
3923#[test]
3924fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
3925    let mut matcher = MatchGenerator::new(64);
3926    matcher.add_data(
3927        b"abcdefghijabcdefghij".to_vec(),
3928        SuffixStore::with_capacity(64),
3929        |_, _| {},
3930    );
3931
3932    matcher.suffix_idx = 10;
3933    matcher.last_idx_in_sequence = 10;
3934    matcher.offset_hist = [99, 10, 4];
3935
3936    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3937    assert_eq!(candidate, Some((10, 10)));
3938}
3939
3940#[test]
3941fn simple_matcher_repcode_can_target_previous_window_entry() {
3942    let mut matcher = MatchGenerator::new(64);
3943    matcher.add_data(
3944        b"abcdefghij".to_vec(),
3945        SuffixStore::with_capacity(64),
3946        |_, _| {},
3947    );
3948    matcher.skip_matching();
3949    matcher.add_data(
3950        b"abcdefghij".to_vec(),
3951        SuffixStore::with_capacity(64),
3952        |_, _| {},
3953    );
3954
3955    matcher.offset_hist = [99, 10, 4];
3956
3957    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
3958    assert_eq!(candidate, Some((10, 10)));
3959}
3960
3961#[test]
3962fn simple_matcher_zero_literal_repcode_checks_rep2() {
3963    let mut matcher = MatchGenerator::new(64);
3964    matcher.add_data(
3965        b"abcdefghijabcdefghij".to_vec(),
3966        SuffixStore::with_capacity(64),
3967        |_, _| {},
3968    );
3969    matcher.suffix_idx = 10;
3970    matcher.last_idx_in_sequence = 10;
3971    // rep1=4 does not match at idx 10, rep2=10 does.
3972    matcher.offset_hist = [99, 4, 10];
3973
3974    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3975    assert_eq!(candidate, Some((10, 10)));
3976}
3977
3978#[test]
3979fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
3980    let mut matcher = MatchGenerator::new(64);
3981    matcher.add_data(
3982        b"abcdefghijabcdefghij".to_vec(),
3983        SuffixStore::with_capacity(64),
3984        |_, _| {},
3985    );
3986    matcher.suffix_idx = 10;
3987    matcher.last_idx_in_sequence = 10;
3988    // rep1=4 and rep2=99 do not match; rep0-1 == 10 does.
3989    matcher.offset_hist = [11, 4, 99];
3990
3991    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
3992    assert_eq!(candidate, Some((10, 10)));
3993}
3994
3995#[test]
3996fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
3997    let mut matcher = MatchGenerator::new(64);
3998    matcher.add_data(
3999        b"abcdefghij".to_vec(),
4000        SuffixStore::with_capacity(64),
4001        |_, _| {},
4002    );
4003    matcher.skip_matching();
4004    matcher.add_data(
4005        b"klmnopqrst".to_vec(),
4006        SuffixStore::with_capacity(64),
4007        |_, _| {},
4008    );
4009    matcher.suffix_idx = 3;
4010
4011    let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
4012    assert_eq!(candidate, None);
4013}
4014
4015#[test]
4016fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
4017    let mut matcher = MatchGenerator::new(64);
4018    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4019    matcher.add_data(
4020        b"abcdefghijklmnop".to_vec(),
4021        SuffixStore::with_capacity(64),
4022        |_, _| {},
4023    );
4024    matcher.skip_matching();
4025    matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
4026
4027    assert!(matcher.next_sequence(|seq| {
4028        assert_eq!(
4029            seq,
4030            Sequence::Triple {
4031                literals: b"",
4032                offset: 15,
4033                match_len: 5,
4034            }
4035        );
4036    }));
4037    assert!(!matcher.next_sequence(|_| {}));
4038}
4039
4040#[test]
4041fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
4042    let mut matcher = MatchGenerator::new(64);
4043    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4044    matcher.add_data(
4045        b"01234abcde".to_vec(),
4046        SuffixStore::with_capacity(64),
4047        |_, _| {},
4048    );
4049    matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
4050
4051    let last = matcher.window.last().unwrap();
4052    let tail = &last.data[5..10];
4053    assert_eq!(last.suffixes.get(tail), Some(5));
4054}
4055
4056#[test]
4057fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
4058    let mut matcher = MatchGenerator::new(128);
4059    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4060    matcher.add_data(
4061        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4062        SuffixStore::with_capacity(1 << 16),
4063        |_, _| {},
4064    );
4065
4066    matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
4067
4068    let last = matcher.window.last().unwrap();
4069    let first_key = &last.data[..MIN_MATCH_LEN];
4070    assert_eq!(last.suffixes.get(first_key), None);
4071}
4072
4073#[test]
4074fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
4075    let mut matcher = MatchGenerator::new(128);
4076    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4077    matcher.add_data(
4078        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4079        SuffixStore::with_capacity(1 << 16),
4080        |_, _| {},
4081    );
4082
4083    matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
4084
4085    let last = matcher.window.last().unwrap();
4086    for pos in [0usize, 3, 6, 9, 12] {
4087        let key = &last.data[pos..pos + MIN_MATCH_LEN];
4088        assert_eq!(
4089            last.suffixes.get(key),
4090            Some(pos),
4091            "expected interleaved suffix registration at pos {pos}"
4092        );
4093    }
4094}
4095
4096#[test]
4097fn dfast_skip_matching_handles_window_eviction() {
4098    let mut matcher = DfastMatchGenerator::new(16);
4099
4100    matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
4101    matcher.skip_matching();
4102    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4103    matcher.skip_matching();
4104    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4105
4106    let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
4107    matcher.start_matching(|seq| match seq {
4108        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4109        Sequence::Triple {
4110            literals,
4111            offset,
4112            match_len,
4113        } => {
4114            reconstructed.extend_from_slice(literals);
4115            let start = reconstructed.len() - offset;
4116            for i in 0..match_len {
4117                let byte = reconstructed[start + i];
4118                reconstructed.push(byte);
4119            }
4120        }
4121    });
4122
4123    assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
4124}
4125
4126#[test]
4127fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
4128    let mut matcher = DfastMatchGenerator::new(8);
4129
4130    let mut first = Vec::with_capacity(64);
4131    first.extend_from_slice(b"abcdefgh");
4132    matcher.add_data(first, |_| {});
4133
4134    let mut second = Vec::with_capacity(64);
4135    second.extend_from_slice(b"ijklmnop");
4136
4137    let mut observed_evicted_len = None;
4138    matcher.add_data(second, |data| {
4139        observed_evicted_len = Some(data.len());
4140    });
4141
4142    assert_eq!(
4143        observed_evicted_len,
4144        Some(8),
4145        "eviction callback must report evicted byte length, not backing capacity"
4146    );
4147}
4148
4149#[test]
4150fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
4151    let mut matcher = DfastMatchGenerator::new(16);
4152
4153    let mut first = Vec::with_capacity(64);
4154    first.extend_from_slice(b"abcdefgh");
4155    matcher.add_data(first, |_| {});
4156
4157    let mut second = Vec::with_capacity(64);
4158    second.extend_from_slice(b"ijklmnop");
4159    matcher.add_data(second, |_| {});
4160
4161    matcher.max_window_size = 8;
4162
4163    let mut observed_evicted_len = None;
4164    matcher.trim_to_window(|data| {
4165        observed_evicted_len = Some(data.len());
4166    });
4167
4168    assert_eq!(
4169        observed_evicted_len,
4170        Some(8),
4171        "trim callback must report evicted byte length, not backing capacity"
4172    );
4173}
4174
4175#[test]
4176fn dfast_inserts_tail_positions_for_next_block_matching() {
4177    let mut matcher = DfastMatchGenerator::new(1 << 22);
4178
4179    matcher.add_data(b"012345bcdea".to_vec(), |_| {});
4180    let mut history = Vec::new();
4181    matcher.start_matching(|seq| match seq {
4182        Sequence::Literals { literals } => history.extend_from_slice(literals),
4183        Sequence::Triple { .. } => unreachable!("first block should not match history"),
4184    });
4185    assert_eq!(history, b"012345bcdea");
4186
4187    matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
4188    let mut saw_first_sequence = false;
4189    matcher.start_matching(|seq| {
4190        assert!(!saw_first_sequence, "expected a single cross-block match");
4191        saw_first_sequence = true;
4192        match seq {
4193            Sequence::Literals { .. } => {
4194                panic!("expected tail-anchored cross-block match before any literals")
4195            }
4196            Sequence::Triple {
4197                literals,
4198                offset,
4199                match_len,
4200            } => {
4201                assert_eq!(literals, b"");
4202                assert_eq!(offset, 5);
4203                assert_eq!(match_len, 11);
4204                let start = history.len() - offset;
4205                for i in 0..match_len {
4206                    let byte = history[start + i];
4207                    history.push(byte);
4208                }
4209            }
4210        }
4211    });
4212
4213    assert!(
4214        saw_first_sequence,
4215        "expected tail-anchored cross-block match"
4216    );
4217    assert_eq!(history, b"012345bcdeabcdeabcdeab");
4218}
4219
4220#[test]
4221fn fastest_hint_iteration_23_sequences_reconstruct_source() {
4222    fn generate_data(seed: u64, len: usize) -> Vec<u8> {
4223        let mut state = seed;
4224        let mut data = Vec::with_capacity(len);
4225        for _ in 0..len {
4226            state = state
4227                .wrapping_mul(6364136223846793005)
4228                .wrapping_add(1442695040888963407);
4229            data.push((state >> 33) as u8);
4230        }
4231        data
4232    }
4233
4234    let i = 23u64;
4235    let len = (i * 89 % 16384) as usize;
4236    let mut data = generate_data(i, len);
4237    // Append a repeated slice so the fixture deterministically exercises
4238    // the match path (Sequence::Triple) instead of only literals.
4239    let repeat = data[128..256].to_vec();
4240    data.extend_from_slice(&repeat);
4241    data.extend_from_slice(&repeat);
4242
4243    let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
4244    driver.set_source_size_hint(data.len() as u64);
4245    driver.reset(CompressionLevel::Fastest);
4246    let mut space = driver.get_next_space();
4247    space[..data.len()].copy_from_slice(&data);
4248    space.truncate(data.len());
4249    driver.commit_space(space);
4250
4251    let mut rebuilt = Vec::with_capacity(data.len());
4252    let mut saw_triple = false;
4253    driver.start_matching(|seq| match seq {
4254        Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
4255        Sequence::Triple {
4256            literals,
4257            offset,
4258            match_len,
4259        } => {
4260            saw_triple = true;
4261            rebuilt.extend_from_slice(literals);
4262            assert!(offset > 0, "offset must be non-zero");
4263            assert!(
4264                offset <= rebuilt.len(),
4265                "offset must reference already-produced bytes: offset={} produced={}",
4266                offset,
4267                rebuilt.len()
4268            );
4269            let start = rebuilt.len() - offset;
4270            for idx in 0..match_len {
4271                let b = rebuilt[start + idx];
4272                rebuilt.push(b);
4273            }
4274        }
4275    });
4276
4277    assert!(saw_triple, "fixture must emit at least one match");
4278    assert_eq!(rebuilt, data);
4279}