Skip to main content

structured_zstd/encoding/
match_generator.rs

1//! Matching algorithm used find repeated parts in the original data
2//!
3//! The Zstd format relies on finden repeated sequences of data and compressing these sequences as instructions to the decoder.
4//! A sequence basically tells the decoder "Go back X bytes and copy Y bytes to the end of your decode buffer".
5//!
6//! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data.
7
8use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
11use core::arch::aarch64::{uint8x16_t, vceqq_u8, vgetq_lane_u64, vld1q_u8, vreinterpretq_u64_u8};
12#[cfg(target_arch = "x86")]
13use core::arch::x86::{
14    __m128i, __m256i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm256_cmpeq_epi8,
15    _mm256_loadu_si256, _mm256_movemask_epi8,
16};
17#[cfg(target_arch = "x86_64")]
18use core::arch::x86_64::{
19    __m128i, __m256i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm256_cmpeq_epi8,
20    _mm256_loadu_si256, _mm256_movemask_epi8,
21};
22use core::convert::TryInto;
23use core::num::NonZeroUsize;
24
25use super::BETTER_WINDOW_LOG;
26use super::CompressionLevel;
27use super::Matcher;
28use super::Sequence;
29use super::blocks::encode_offset_with_history;
30use super::incompressible::{block_looks_incompressible, block_looks_incompressible_strict};
31#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
32use std::arch::is_aarch64_feature_detected;
33#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
34use std::arch::is_x86_feature_detected;
35#[cfg(feature = "std")]
36use std::sync::OnceLock;
37
38const MIN_MATCH_LEN: usize = 5;
39const FAST_HASH_FILL_STEP: usize = 3;
40const INCOMPRESSIBLE_SKIP_STEP: usize = 8;
41const DFAST_MIN_MATCH_LEN: usize = 6;
42const DFAST_SHORT_HASH_LOOKAHEAD: usize = 4;
43const ROW_MIN_MATCH_LEN: usize = 6;
44const DFAST_TARGET_LEN: usize = 48;
45// Keep these aligned with the issue's zstd level-3/dfast target unless ratio
46// measurements show we can shrink them without regressing acceptance tests.
47const DFAST_HASH_BITS: usize = 20;
48const DFAST_SEARCH_DEPTH: usize = 4;
49const DFAST_EMPTY_SLOT: usize = usize::MAX;
50const DFAST_SKIP_SEARCH_STRENGTH: usize = 6;
51const DFAST_SKIP_STEP_GROWTH_INTERVAL: usize = 1 << DFAST_SKIP_SEARCH_STRENGTH;
52const DFAST_LOCAL_SKIP_TRIGGER: usize = 256;
53const DFAST_MAX_SKIP_STEP: usize = 8;
54const DFAST_INCOMPRESSIBLE_SKIP_STEP: usize = 16;
55const ROW_HASH_BITS: usize = 20;
56const ROW_LOG: usize = 5;
57const ROW_SEARCH_DEPTH: usize = 16;
58const ROW_TARGET_LEN: usize = 48;
59const ROW_TAG_BITS: usize = 8;
60const ROW_EMPTY_SLOT: usize = usize::MAX;
61const ROW_HASH_KEY_LEN: usize = 4;
62
63const HC_HASH_LOG: usize = 20;
64const HC_CHAIN_LOG: usize = 19;
65const HC_SEARCH_DEPTH: usize = 16;
66const HC_MIN_MATCH_LEN: usize = 5;
67const HC_TARGET_LEN: usize = 48;
68// Positions are stored as (relative_pos + 1) so that 0 is a safe empty
69// sentinel that can never collide with any valid position.
70const HC_EMPTY: u32 = 0;
71
72// Maximum search depth across all HC-based levels. Used to size the
73// fixed-length candidate array returned by chain_candidates().
74const MAX_HC_SEARCH_DEPTH: usize = 32;
75
76#[derive(Copy, Clone, Debug, Eq, PartialEq)]
77enum PrefixKernel {
78    Scalar,
79    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
80    X86Sse2,
81    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
82    X86Avx2,
83    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
84    Aarch64Neon,
85}
86
87/// Bundled tuning knobs for the hash-chain matcher. Using a typed config
88/// instead of positional `usize` args eliminates parameter-order hazards.
89#[derive(Copy, Clone)]
90struct HcConfig {
91    hash_log: usize,
92    chain_log: usize,
93    search_depth: usize,
94    target_len: usize,
95}
96
97#[derive(Copy, Clone)]
98struct RowConfig {
99    hash_bits: usize,
100    row_log: usize,
101    search_depth: usize,
102    target_len: usize,
103}
104
105const HC_CONFIG: HcConfig = HcConfig {
106    hash_log: HC_HASH_LOG,
107    chain_log: HC_CHAIN_LOG,
108    search_depth: HC_SEARCH_DEPTH,
109    target_len: HC_TARGET_LEN,
110};
111
112/// Best-level: deeper search, larger tables, higher target length.
113const BEST_HC_CONFIG: HcConfig = HcConfig {
114    hash_log: 21,
115    chain_log: 20,
116    search_depth: 32,
117    target_len: 128,
118};
119
120const ROW_CONFIG: RowConfig = RowConfig {
121    hash_bits: ROW_HASH_BITS,
122    row_log: ROW_LOG,
123    search_depth: ROW_SEARCH_DEPTH,
124    target_len: ROW_TARGET_LEN,
125};
126
127/// Resolved tuning parameters for a compression level.
128#[derive(Copy, Clone)]
129struct LevelParams {
130    backend: MatcherBackend,
131    window_log: u8,
132    hash_fill_step: usize,
133    lazy_depth: u8,
134    hc: HcConfig,
135    row: RowConfig,
136}
137
138fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
139    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
140    window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
141}
142
143fn row_hash_bits_for_window(max_window_size: usize) -> usize {
144    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
145    window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
146}
147
148/// Parameter table for numeric compression levels 1–22.
149///
150/// Each entry maps a zstd compression level to the best-available matcher
151/// backend and tuning knobs.  Levels that require strategies this crate does
152/// not implement (greedy, btopt, btultra) are approximated with the closest
153/// available backend.
154///
155/// Index 0 = level 1, index 21 = level 22.
156#[rustfmt::skip]
157const LEVEL_TABLE: [LevelParams; 22] = [
158    // Lvl  Strategy       wlog  step  lazy  HC config                                   row config
159    // ---  -------------- ----  ----  ----  ------------------------------------------  ----------
160    /* 1 */ LevelParams { backend: MatcherBackend::Simple,    window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
161    /* 2 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
162    /* 3 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
163    /* 4 */ LevelParams { backend: MatcherBackend::Row,       window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
164    /* 5 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4,  target_len: 32  }, row: ROW_CONFIG },
165    /* 6 */ LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8,  target_len: 48  }, row: ROW_CONFIG },
166    /* 7 */ LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48  }, row: ROW_CONFIG },
167    /* 8 */ LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64  }, row: ROW_CONFIG },
168    /* 9 */ LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64  }, row: ROW_CONFIG },
169    /*10 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96  }, row: ROW_CONFIG },
170    /*11 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
171    /*12 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
172    /*13 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
173    /*14 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
174    /*15 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
175    /*16 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
176    /*17 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
177    /*18 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
178    /*19 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
179    /*20 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
180    /*21 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
181    /*22 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
182];
183
184/// Smallest window_log the encoder will use regardless of source size.
185const MIN_WINDOW_LOG: u8 = 10;
186/// Conservative floor for source-size-hinted window tuning.
187///
188/// Hinted windows below 16 KiB (`window_log < 14`) currently regress C-FFI
189/// interoperability on certain compressed-block patterns. Keep hinted
190/// windows at 16 KiB or larger until that compatibility gap is closed.
191const MIN_HINTED_WINDOW_LOG: u8 = 14;
192
193/// Adjust level parameters for a known source size.
194///
195/// This derives a cap from `ceil(log2(src_size))`, then clamps it to
196/// [`MIN_HINTED_WINDOW_LOG`] (16 KiB). A zero-byte size hint is treated as
197/// [`MIN_WINDOW_LOG`] for the raw ceil-log step and then promoted to the hinted
198/// floor. This keeps tables bounded for small inputs while preserving the
199/// encoder's baseline minimum supported window.
200/// For the HC backend, `hash_log` and `chain_log` are reduced
201/// proportionally.
202fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
203    // Derive a source-size-based cap from ceil(log2(src_size)), then
204    // clamp first to MIN_WINDOW_LOG (baseline encoder minimum) and then to
205    // MIN_HINTED_WINDOW_LOG (16 KiB hinted floor). For tiny or zero hints we
206    // therefore keep a 16 KiB effective minimum window in hinted mode.
207    let src_log = if src_size == 0 {
208        MIN_WINDOW_LOG
209    } else {
210        (64 - (src_size - 1).leading_zeros()) as u8 // ceil_log2
211    };
212    let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
213    if src_log < params.window_log {
214        params.window_log = src_log;
215    }
216    // For HC backend: also cap hash_log and chain_log so tables are
217    // proportional to the source, avoiding multi-MB allocations for
218    // tiny inputs.
219    if params.backend == MatcherBackend::HashChain {
220        if (src_log + 2) < params.hc.hash_log as u8 {
221            params.hc.hash_log = (src_log + 2) as usize;
222        }
223        if (src_log + 1) < params.hc.chain_log as u8 {
224            params.hc.chain_log = (src_log + 1) as usize;
225        }
226    } else if params.backend == MatcherBackend::Row {
227        let max_window_size = 1usize << params.window_log;
228        params.row.hash_bits = row_hash_bits_for_window(max_window_size);
229    }
230    params
231}
232
233/// Resolve a [`CompressionLevel`] to internal tuning parameters,
234/// optionally adjusted for a known source size.
235fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
236    let params = match level {
237        CompressionLevel::Uncompressed => LevelParams {
238            backend: MatcherBackend::Simple,
239            window_log: 17,
240            hash_fill_step: 1,
241            lazy_depth: 0,
242            hc: HC_CONFIG,
243            row: ROW_CONFIG,
244        },
245        CompressionLevel::Fastest => LEVEL_TABLE[0],
246        CompressionLevel::Default => LEVEL_TABLE[2],
247        CompressionLevel::Better => LEVEL_TABLE[6],
248        CompressionLevel::Best => LEVEL_TABLE[10],
249        CompressionLevel::Level(n) => {
250            if n > 0 {
251                let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
252                LEVEL_TABLE[idx]
253            } else if n == 0 {
254                // Level 0 = default, matching C zstd semantics.
255                LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
256            } else {
257                // Negative levels: ultra-fast with the Simple backend.
258                // Acceleration grows with magnitude, expressed as larger
259                // hash_fill_step (fewer positions indexed).
260                let acceleration =
261                    (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
262                let step = (acceleration + 3).min(128);
263                LevelParams {
264                    backend: MatcherBackend::Simple,
265                    window_log: 17,
266                    hash_fill_step: step,
267                    lazy_depth: 0,
268                    hc: HC_CONFIG,
269                    row: ROW_CONFIG,
270                }
271            }
272        }
273    };
274    if let Some(size) = source_size {
275        adjust_params_for_source_size(params, size)
276    } else {
277        params
278    }
279}
280
281#[derive(Copy, Clone, Debug, PartialEq, Eq)]
282enum MatcherBackend {
283    Simple,
284    Dfast,
285    Row,
286    HashChain,
287}
288
289/// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible.
290pub struct MatchGeneratorDriver {
291    vec_pool: Vec<Vec<u8>>,
292    suffix_pool: Vec<SuffixStore>,
293    match_generator: MatchGenerator,
294    dfast_match_generator: Option<DfastMatchGenerator>,
295    row_match_generator: Option<RowMatchGenerator>,
296    hc_match_generator: Option<HcMatchGenerator>,
297    active_backend: MatcherBackend,
298    slice_size: usize,
299    base_slice_size: usize,
300    // Frame header window size must stay at the configured live-window budget.
301    // Dictionary retention expands internal matcher capacity only.
302    reported_window_size: usize,
303    // Tracks currently retained bytes that originated from primed dictionary
304    // history and have not been evicted yet.
305    dictionary_retained_budget: usize,
306    // Source size hint for next frame (set via set_source_size_hint, cleared on reset).
307    source_size_hint: Option<u64>,
308}
309
310impl MatchGeneratorDriver {
311    /// `slice_size` sets the base block allocation size used for matcher input chunks.
312    /// `max_slices_in_window` determines the initial window capacity at construction
313    /// time. Effective window sizing is recalculated on every [`reset`](Self::reset)
314    /// from the resolved compression level and optional source-size hint.
315    pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
316        let max_window_size = max_slices_in_window * slice_size;
317        Self {
318            vec_pool: Vec::new(),
319            suffix_pool: Vec::new(),
320            match_generator: MatchGenerator::new(max_window_size),
321            dfast_match_generator: None,
322            row_match_generator: None,
323            hc_match_generator: None,
324            active_backend: MatcherBackend::Simple,
325            slice_size,
326            base_slice_size: slice_size,
327            reported_window_size: max_window_size,
328            dictionary_retained_budget: 0,
329            source_size_hint: None,
330        }
331    }
332
333    fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
334        resolve_level_params(level, source_size)
335    }
336
337    fn dfast_matcher(&self) -> &DfastMatchGenerator {
338        self.dfast_match_generator
339            .as_ref()
340            .expect("dfast backend must be initialized by reset() before use")
341    }
342
343    fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
344        self.dfast_match_generator
345            .as_mut()
346            .expect("dfast backend must be initialized by reset() before use")
347    }
348
349    fn row_matcher(&self) -> &RowMatchGenerator {
350        self.row_match_generator
351            .as_ref()
352            .expect("row backend must be initialized by reset() before use")
353    }
354
355    fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
356        self.row_match_generator
357            .as_mut()
358            .expect("row backend must be initialized by reset() before use")
359    }
360
361    fn hc_matcher(&self) -> &HcMatchGenerator {
362        self.hc_match_generator
363            .as_ref()
364            .expect("hash chain backend must be initialized by reset() before use")
365    }
366
367    fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
368        self.hc_match_generator
369            .as_mut()
370            .expect("hash chain backend must be initialized by reset() before use")
371    }
372
373    fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
374        let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
375        if reclaimed == 0 {
376            return;
377        }
378        self.dictionary_retained_budget -= reclaimed;
379        match self.active_backend {
380            MatcherBackend::Simple => {
381                self.match_generator.max_window_size = self
382                    .match_generator
383                    .max_window_size
384                    .saturating_sub(reclaimed);
385            }
386            MatcherBackend::Dfast => {
387                let matcher = self.dfast_matcher_mut();
388                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
389            }
390            MatcherBackend::Row => {
391                let matcher = self.row_matcher_mut();
392                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
393            }
394            MatcherBackend::HashChain => {
395                let matcher = self.hc_matcher_mut();
396                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
397            }
398        }
399    }
400
401    fn trim_after_budget_retire(&mut self) {
402        loop {
403            let mut evicted_bytes = 0usize;
404            match self.active_backend {
405                MatcherBackend::Simple => {
406                    let vec_pool = &mut self.vec_pool;
407                    let suffix_pool = &mut self.suffix_pool;
408                    self.match_generator.reserve(0, |mut data, mut suffixes| {
409                        evicted_bytes += data.len();
410                        data.resize(data.capacity(), 0);
411                        vec_pool.push(data);
412                        suffixes.slots.clear();
413                        suffixes.slots.resize(suffixes.slots.capacity(), None);
414                        suffix_pool.push(suffixes);
415                    });
416                }
417                MatcherBackend::Dfast => {
418                    let mut retired = Vec::new();
419                    self.dfast_matcher_mut().trim_to_window(|data| {
420                        evicted_bytes += data.len();
421                        retired.push(data);
422                    });
423                    for mut data in retired {
424                        data.resize(data.capacity(), 0);
425                        self.vec_pool.push(data);
426                    }
427                }
428                MatcherBackend::Row => {
429                    let mut retired = Vec::new();
430                    self.row_matcher_mut().trim_to_window(|data| {
431                        evicted_bytes += data.len();
432                        retired.push(data);
433                    });
434                    for mut data in retired {
435                        data.resize(data.capacity(), 0);
436                        self.vec_pool.push(data);
437                    }
438                }
439                MatcherBackend::HashChain => {
440                    let mut retired = Vec::new();
441                    self.hc_matcher_mut().trim_to_window(|data| {
442                        evicted_bytes += data.len();
443                        retired.push(data);
444                    });
445                    for mut data in retired {
446                        data.resize(data.capacity(), 0);
447                        self.vec_pool.push(data);
448                    }
449                }
450            }
451            if evicted_bytes == 0 {
452                break;
453            }
454            self.retire_dictionary_budget(evicted_bytes);
455        }
456    }
457
458    fn skip_matching_for_dictionary_priming(&mut self) {
459        match self.active_backend {
460            MatcherBackend::Simple => self.match_generator.skip_matching_with_hint(Some(false)),
461            MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching_dense(),
462            MatcherBackend::Row => self.row_matcher_mut().skip_matching_with_hint(Some(false)),
463            MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(Some(false)),
464        }
465    }
466}
467
468impl Matcher for MatchGeneratorDriver {
469    fn supports_dictionary_priming(&self) -> bool {
470        true
471    }
472
473    fn set_source_size_hint(&mut self, size: u64) {
474        self.source_size_hint = Some(size);
475    }
476
477    fn reset(&mut self, level: CompressionLevel) {
478        let hint = self.source_size_hint.take();
479        let hinted = hint.is_some();
480        let params = Self::level_params(level, hint);
481        let max_window_size = 1usize << params.window_log;
482        self.dictionary_retained_budget = 0;
483        if self.active_backend != params.backend {
484            match self.active_backend {
485                MatcherBackend::Simple => {
486                    let vec_pool = &mut self.vec_pool;
487                    let suffix_pool = &mut self.suffix_pool;
488                    self.match_generator.reset(|mut data, mut suffixes| {
489                        data.resize(data.capacity(), 0);
490                        vec_pool.push(data);
491                        suffixes.slots.clear();
492                        suffixes.slots.resize(suffixes.slots.capacity(), None);
493                        suffix_pool.push(suffixes);
494                    });
495                }
496                MatcherBackend::Dfast => {
497                    if let Some(dfast) = self.dfast_match_generator.as_mut() {
498                        let vec_pool = &mut self.vec_pool;
499                        dfast.reset(|mut data| {
500                            data.resize(data.capacity(), 0);
501                            vec_pool.push(data);
502                        });
503                    }
504                }
505                MatcherBackend::Row => {
506                    if let Some(row) = self.row_match_generator.as_mut() {
507                        row.row_heads = Vec::new();
508                        row.row_positions = Vec::new();
509                        row.row_tags = Vec::new();
510                        let vec_pool = &mut self.vec_pool;
511                        row.reset(|mut data| {
512                            data.resize(data.capacity(), 0);
513                            vec_pool.push(data);
514                        });
515                    }
516                }
517                MatcherBackend::HashChain => {
518                    if let Some(hc) = self.hc_match_generator.as_mut() {
519                        // Release oversized tables when switching away from
520                        // HashChain so Best's larger allocations don't persist.
521                        hc.hash_table = Vec::new();
522                        hc.chain_table = Vec::new();
523                        let vec_pool = &mut self.vec_pool;
524                        hc.reset(|mut data| {
525                            data.resize(data.capacity(), 0);
526                            vec_pool.push(data);
527                        });
528                    }
529                }
530            }
531        }
532
533        self.active_backend = params.backend;
534        self.slice_size = self.base_slice_size.min(max_window_size);
535        self.reported_window_size = max_window_size;
536        match self.active_backend {
537            MatcherBackend::Simple => {
538                let vec_pool = &mut self.vec_pool;
539                let suffix_pool = &mut self.suffix_pool;
540                self.match_generator.max_window_size = max_window_size;
541                self.match_generator.hash_fill_step = params.hash_fill_step;
542                self.match_generator.reset(|mut data, mut suffixes| {
543                    data.resize(data.capacity(), 0);
544                    vec_pool.push(data);
545                    suffixes.slots.clear();
546                    suffixes.slots.resize(suffixes.slots.capacity(), None);
547                    suffix_pool.push(suffixes);
548                });
549            }
550            MatcherBackend::Dfast => {
551                let dfast = self
552                    .dfast_match_generator
553                    .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
554                dfast.max_window_size = max_window_size;
555                dfast.lazy_depth = params.lazy_depth;
556                dfast.use_fast_loop = matches!(
557                    level,
558                    CompressionLevel::Default
559                        | CompressionLevel::Level(0)
560                        | CompressionLevel::Level(3)
561                );
562                dfast.set_hash_bits(if hinted {
563                    dfast_hash_bits_for_window(max_window_size)
564                } else {
565                    DFAST_HASH_BITS
566                });
567                let vec_pool = &mut self.vec_pool;
568                dfast.reset(|mut data| {
569                    data.resize(data.capacity(), 0);
570                    vec_pool.push(data);
571                });
572            }
573            MatcherBackend::Row => {
574                let row = self
575                    .row_match_generator
576                    .get_or_insert_with(|| RowMatchGenerator::new(max_window_size));
577                row.max_window_size = max_window_size;
578                row.lazy_depth = params.lazy_depth;
579                row.configure(params.row);
580                if hinted {
581                    row.set_hash_bits(row_hash_bits_for_window(max_window_size));
582                }
583                let vec_pool = &mut self.vec_pool;
584                row.reset(|mut data| {
585                    data.resize(data.capacity(), 0);
586                    vec_pool.push(data);
587                });
588            }
589            MatcherBackend::HashChain => {
590                let hc = self
591                    .hc_match_generator
592                    .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
593                hc.max_window_size = max_window_size;
594                hc.lazy_depth = params.lazy_depth;
595                hc.configure(params.hc);
596                let vec_pool = &mut self.vec_pool;
597                hc.reset(|mut data| {
598                    data.resize(data.capacity(), 0);
599                    vec_pool.push(data);
600                });
601            }
602        }
603    }
604
605    fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
606        match self.active_backend {
607            MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
608            MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
609            MatcherBackend::Row => self.row_matcher_mut().offset_hist = offset_hist,
610            MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
611        }
612
613        if dict_content.is_empty() {
614            return;
615        }
616
617        // Dictionary bytes should stay addressable until produced frame output
618        // itself exceeds the live window size.
619        let retained_dict_budget = dict_content.len();
620        match self.active_backend {
621            MatcherBackend::Simple => {
622                self.match_generator.max_window_size = self
623                    .match_generator
624                    .max_window_size
625                    .saturating_add(retained_dict_budget);
626            }
627            MatcherBackend::Dfast => {
628                let matcher = self.dfast_matcher_mut();
629                matcher.max_window_size =
630                    matcher.max_window_size.saturating_add(retained_dict_budget);
631            }
632            MatcherBackend::Row => {
633                let matcher = self.row_matcher_mut();
634                matcher.max_window_size =
635                    matcher.max_window_size.saturating_add(retained_dict_budget);
636            }
637            MatcherBackend::HashChain => {
638                let matcher = self.hc_matcher_mut();
639                matcher.max_window_size =
640                    matcher.max_window_size.saturating_add(retained_dict_budget);
641            }
642        }
643
644        let mut start = 0usize;
645        let mut committed_dict_budget = 0usize;
646        // insert_position needs 4 bytes of lookahead for hashing;
647        // backfill_boundary_positions re-visits tail positions once the
648        // next slice extends history, but cannot hash <4 byte fragments.
649        let min_primed_tail = match self.active_backend {
650            MatcherBackend::Simple => MIN_MATCH_LEN,
651            MatcherBackend::Dfast | MatcherBackend::Row | MatcherBackend::HashChain => 4,
652        };
653        while start < dict_content.len() {
654            let end = (start + self.slice_size).min(dict_content.len());
655            if end - start < min_primed_tail {
656                break;
657            }
658            let mut space = self.get_next_space();
659            space.clear();
660            space.extend_from_slice(&dict_content[start..end]);
661            self.commit_space(space);
662            self.skip_matching_for_dictionary_priming();
663            committed_dict_budget += end - start;
664            start = end;
665        }
666
667        let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
668        if uncommitted_tail_budget > 0 {
669            match self.active_backend {
670                MatcherBackend::Simple => {
671                    self.match_generator.max_window_size = self
672                        .match_generator
673                        .max_window_size
674                        .saturating_sub(uncommitted_tail_budget);
675                }
676                MatcherBackend::Dfast => {
677                    let matcher = self.dfast_matcher_mut();
678                    matcher.max_window_size = matcher
679                        .max_window_size
680                        .saturating_sub(uncommitted_tail_budget);
681                }
682                MatcherBackend::Row => {
683                    let matcher = self.row_matcher_mut();
684                    matcher.max_window_size = matcher
685                        .max_window_size
686                        .saturating_sub(uncommitted_tail_budget);
687                }
688                MatcherBackend::HashChain => {
689                    let matcher = self.hc_matcher_mut();
690                    matcher.max_window_size = matcher
691                        .max_window_size
692                        .saturating_sub(uncommitted_tail_budget);
693                }
694            }
695        }
696        if committed_dict_budget > 0 {
697            self.dictionary_retained_budget = self
698                .dictionary_retained_budget
699                .saturating_add(committed_dict_budget);
700        }
701    }
702
703    fn window_size(&self) -> u64 {
704        self.reported_window_size as u64
705    }
706
707    fn get_next_space(&mut self) -> Vec<u8> {
708        if let Some(mut space) = self.vec_pool.pop() {
709            if space.len() > self.slice_size {
710                space.truncate(self.slice_size);
711            }
712            if space.len() < self.slice_size {
713                space.resize(self.slice_size, 0);
714            }
715            return space;
716        }
717        alloc::vec![0; self.slice_size]
718    }
719
720    fn get_last_space(&mut self) -> &[u8] {
721        match self.active_backend {
722            MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
723            MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
724            MatcherBackend::Row => self.row_matcher().get_last_space(),
725            MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
726        }
727    }
728
729    fn commit_space(&mut self, space: Vec<u8>) {
730        match self.active_backend {
731            MatcherBackend::Simple => {
732                let vec_pool = &mut self.vec_pool;
733                let mut evicted_bytes = 0usize;
734                let suffixes = match self.suffix_pool.pop() {
735                    Some(store) if store.slots.len() >= space.len() => store,
736                    _ => SuffixStore::with_capacity(space.len()),
737                };
738                let suffix_pool = &mut self.suffix_pool;
739                self.match_generator
740                    .add_data(space, suffixes, |mut data, mut suffixes| {
741                        evicted_bytes += data.len();
742                        data.resize(data.capacity(), 0);
743                        vec_pool.push(data);
744                        suffixes.slots.clear();
745                        suffixes.slots.resize(suffixes.slots.capacity(), None);
746                        suffix_pool.push(suffixes);
747                    });
748                self.retire_dictionary_budget(evicted_bytes);
749                self.trim_after_budget_retire();
750            }
751            MatcherBackend::Dfast => {
752                let vec_pool = &mut self.vec_pool;
753                let mut evicted_bytes = 0usize;
754                self.dfast_match_generator
755                    .as_mut()
756                    .expect("dfast backend must be initialized by reset() before use")
757                    .add_data(space, |mut data| {
758                        evicted_bytes += data.len();
759                        data.resize(data.capacity(), 0);
760                        vec_pool.push(data);
761                    });
762                self.retire_dictionary_budget(evicted_bytes);
763                self.trim_after_budget_retire();
764            }
765            MatcherBackend::Row => {
766                let vec_pool = &mut self.vec_pool;
767                let mut evicted_bytes = 0usize;
768                self.row_match_generator
769                    .as_mut()
770                    .expect("row backend must be initialized by reset() before use")
771                    .add_data(space, |mut data| {
772                        evicted_bytes += data.len();
773                        data.resize(data.capacity(), 0);
774                        vec_pool.push(data);
775                    });
776                self.retire_dictionary_budget(evicted_bytes);
777                self.trim_after_budget_retire();
778            }
779            MatcherBackend::HashChain => {
780                let vec_pool = &mut self.vec_pool;
781                let mut evicted_bytes = 0usize;
782                self.hc_match_generator
783                    .as_mut()
784                    .expect("hash chain backend must be initialized by reset() before use")
785                    .add_data(space, |mut data| {
786                        evicted_bytes += data.len();
787                        data.resize(data.capacity(), 0);
788                        vec_pool.push(data);
789                    });
790                self.retire_dictionary_budget(evicted_bytes);
791                self.trim_after_budget_retire();
792            }
793        }
794    }
795
796    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
797        match self.active_backend {
798            MatcherBackend::Simple => {
799                while self.match_generator.next_sequence(&mut handle_sequence) {}
800            }
801            MatcherBackend::Dfast => self
802                .dfast_matcher_mut()
803                .start_matching(&mut handle_sequence),
804            MatcherBackend::Row => self.row_matcher_mut().start_matching(&mut handle_sequence),
805            MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
806        }
807    }
808
809    fn skip_matching(&mut self) {
810        self.skip_matching_with_hint(None);
811    }
812
813    fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
814        match self.active_backend {
815            MatcherBackend::Simple => self
816                .match_generator
817                .skip_matching_with_hint(incompressible_hint),
818            MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(incompressible_hint),
819            MatcherBackend::Row => self
820                .row_matcher_mut()
821                .skip_matching_with_hint(incompressible_hint),
822            MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(incompressible_hint),
823        }
824    }
825}
826
827/// This stores the index of a suffix of a string by hashing the first few bytes of that suffix
828/// This means that collisions just overwrite and that you need to check validity after a get
829struct SuffixStore {
830    // We use NonZeroUsize to enable niche optimization here.
831    // On store we do +1 and on get -1
832    // This is ok since usize::MAX is never a valid offset
833    slots: Vec<Option<NonZeroUsize>>,
834    len_log: u32,
835}
836
837impl SuffixStore {
838    fn with_capacity(capacity: usize) -> Self {
839        Self {
840            slots: alloc::vec![None; capacity],
841            len_log: capacity.ilog2(),
842        }
843    }
844
845    #[inline(always)]
846    fn insert(&mut self, suffix: &[u8], idx: usize) {
847        let key = self.key(suffix);
848        self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
849    }
850
851    #[inline(always)]
852    fn contains_key(&self, suffix: &[u8]) -> bool {
853        let key = self.key(suffix);
854        self.slots[key].is_some()
855    }
856
857    #[inline(always)]
858    fn get(&self, suffix: &[u8]) -> Option<usize> {
859        let key = self.key(suffix);
860        self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
861    }
862
863    #[inline(always)]
864    fn key(&self, suffix: &[u8]) -> usize {
865        // Capacity=1 yields len_log=0; shifting by 64 would panic.
866        if self.len_log == 0 {
867            return 0;
868        }
869
870        let s0 = suffix[0] as u64;
871        let s1 = suffix[1] as u64;
872        let s2 = suffix[2] as u64;
873        let s3 = suffix[3] as u64;
874        let s4 = suffix[4] as u64;
875
876        const POLY: u64 = 0xCF3BCCDCABu64;
877
878        let s0 = (s0 << 24).wrapping_mul(POLY);
879        let s1 = (s1 << 32).wrapping_mul(POLY);
880        let s2 = (s2 << 40).wrapping_mul(POLY);
881        let s3 = (s3 << 48).wrapping_mul(POLY);
882        let s4 = (s4 << 56).wrapping_mul(POLY);
883
884        let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
885        let index = index >> (64 - self.len_log);
886        index as usize % self.slots.len()
887    }
888}
889
890/// We keep a window of a few of these entries
891/// All of these are valid targets for a match to be generated for
892struct WindowEntry {
893    data: Vec<u8>,
894    /// Stores indexes into data
895    suffixes: SuffixStore,
896    /// Makes offset calculations efficient
897    base_offset: usize,
898}
899
900pub(crate) struct MatchGenerator {
901    max_window_size: usize,
902    /// Data window we are operating on to find matches
903    /// The data we want to find matches for is in the last slice
904    window: Vec<WindowEntry>,
905    window_size: usize,
906    #[cfg(debug_assertions)]
907    concat_window: Vec<u8>,
908    /// Index in the last slice that we already processed
909    suffix_idx: usize,
910    /// Gets updated when a new sequence is returned to point right behind that sequence
911    last_idx_in_sequence: usize,
912    hash_fill_step: usize,
913    offset_hist: [u32; 3],
914}
915
916impl MatchGenerator {
917    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
918    #[inline(always)]
919    const fn select_x86_prefix_kernel(has_avx2: bool, has_sse2: bool) -> PrefixKernel {
920        if has_avx2 {
921            return PrefixKernel::X86Avx2;
922        }
923        if has_sse2 {
924            return PrefixKernel::X86Sse2;
925        }
926        PrefixKernel::Scalar
927    }
928
929    #[cfg(feature = "std")]
930    #[inline(always)]
931    fn detect_prefix_kernel() -> PrefixKernel {
932        static KERNEL: OnceLock<PrefixKernel> = OnceLock::new();
933        *KERNEL.get_or_init(|| {
934            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
935            {
936                let kernel = Self::select_x86_prefix_kernel(
937                    is_x86_feature_detected!("avx2"),
938                    is_x86_feature_detected!("sse2"),
939                );
940                if kernel != PrefixKernel::Scalar {
941                    return kernel;
942                }
943            }
944            #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
945            {
946                if is_aarch64_feature_detected!("neon") {
947                    return PrefixKernel::Aarch64Neon;
948                }
949            }
950            PrefixKernel::Scalar
951        })
952    }
953
954    #[cfg(not(feature = "std"))]
955    #[inline(always)]
956    fn detect_prefix_kernel() -> PrefixKernel {
957        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
958        {
959            let kernel = Self::select_x86_prefix_kernel(
960                cfg!(target_feature = "avx2"),
961                cfg!(target_feature = "sse2"),
962            );
963            if kernel != PrefixKernel::Scalar {
964                return kernel;
965            }
966        }
967        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
968        {
969            if cfg!(target_feature = "neon") {
970                return PrefixKernel::Aarch64Neon;
971            }
972        }
973        PrefixKernel::Scalar
974    }
975
976    #[inline(always)]
977    #[cfg(target_endian = "little")]
978    fn mismatch_byte_index(diff: usize) -> usize {
979        diff.trailing_zeros() as usize / 8
980    }
981
982    #[inline(always)]
983    #[cfg(target_endian = "big")]
984    fn mismatch_byte_index(diff: usize) -> usize {
985        diff.leading_zeros() as usize / 8
986    }
987
988    /// max_size defines how many bytes will be used at most in the window used for matching
989    fn new(max_size: usize) -> Self {
990        Self {
991            max_window_size: max_size,
992            window: Vec::new(),
993            window_size: 0,
994            #[cfg(debug_assertions)]
995            concat_window: Vec::new(),
996            suffix_idx: 0,
997            last_idx_in_sequence: 0,
998            hash_fill_step: 1,
999            offset_hist: [1, 4, 8],
1000        }
1001    }
1002
1003    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1004        self.window_size = 0;
1005        #[cfg(debug_assertions)]
1006        self.concat_window.clear();
1007        self.suffix_idx = 0;
1008        self.last_idx_in_sequence = 0;
1009        self.offset_hist = [1, 4, 8];
1010        self.window.drain(..).for_each(|entry| {
1011            reuse_space(entry.data, entry.suffixes);
1012        });
1013    }
1014
1015    /// Processes bytes in the current window until either a match is found or no more matches can be found
1016    /// * If a match is found handle_sequence is called with the Triple variant
1017    /// * If no more matches can be found but there are bytes still left handle_sequence is called with the Literals variant
1018    /// * If no more matches can be found and no more bytes are left this returns false
1019    fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
1020        loop {
1021            let last_entry = self.window.last().unwrap();
1022            let data_slice = &last_entry.data;
1023
1024            // We already reached the end of the window, check if we need to return a Literals{}
1025            if self.suffix_idx >= data_slice.len() {
1026                if self.last_idx_in_sequence != self.suffix_idx {
1027                    let literals = &data_slice[self.last_idx_in_sequence..];
1028                    self.last_idx_in_sequence = self.suffix_idx;
1029                    handle_sequence(Sequence::Literals { literals });
1030                    return true;
1031                } else {
1032                    return false;
1033                }
1034            }
1035
1036            // If the remaining data is smaller than the minimum match length we can stop and return a Literals{}
1037            let data_slice = &data_slice[self.suffix_idx..];
1038            if data_slice.len() < MIN_MATCH_LEN {
1039                let last_idx_in_sequence = self.last_idx_in_sequence;
1040                self.last_idx_in_sequence = last_entry.data.len();
1041                self.suffix_idx = last_entry.data.len();
1042                handle_sequence(Sequence::Literals {
1043                    literals: &last_entry.data[last_idx_in_sequence..],
1044                });
1045                return true;
1046            }
1047
1048            // This is the key we are looking to find a match for
1049            let key = &data_slice[..MIN_MATCH_LEN];
1050            let literals_len = self.suffix_idx - self.last_idx_in_sequence;
1051
1052            // Look in each window entry
1053            let mut candidate = self.repcode_candidate(data_slice, literals_len);
1054            for match_entry in self.window.iter() {
1055                if let Some(match_index) = match_entry.suffixes.get(key) {
1056                    let match_slice = &match_entry.data[match_index..];
1057
1058                    // Check how long the common prefix actually is
1059                    let match_len = Self::common_prefix_len(match_slice, data_slice);
1060
1061                    // Collisions in the suffix store might make this check fail
1062                    if match_len >= MIN_MATCH_LEN {
1063                        let offset = match_entry.base_offset + self.suffix_idx - match_index;
1064
1065                        // If we are in debug/tests make sure the match we found is actually at the offset we calculated
1066                        #[cfg(debug_assertions)]
1067                        {
1068                            let unprocessed = last_entry.data.len() - self.suffix_idx;
1069                            let start = self.concat_window.len() - unprocessed - offset;
1070                            let end = start + match_len;
1071                            let check_slice = &self.concat_window[start..end];
1072                            debug_assert_eq!(check_slice, &match_slice[..match_len]);
1073                        }
1074
1075                        if let Some((old_offset, old_match_len)) = candidate {
1076                            if match_len > old_match_len
1077                                || (match_len == old_match_len && offset < old_offset)
1078                            {
1079                                candidate = Some((offset, match_len));
1080                            }
1081                        } else {
1082                            candidate = Some((offset, match_len));
1083                        }
1084                    }
1085                }
1086            }
1087
1088            if let Some((offset, match_len)) = candidate {
1089                // For each index in the match we found we do not need to look for another match
1090                // But we still want them registered in the suffix store
1091                self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
1092
1093                // All literals that were not included between this match and the last are now included here
1094                let last_entry = self.window.last().unwrap();
1095                let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
1096
1097                // Update the indexes, all indexes upto and including the current index have been included in a sequence now
1098                self.suffix_idx += match_len;
1099                self.last_idx_in_sequence = self.suffix_idx;
1100                let _ = encode_offset_with_history(
1101                    offset as u32,
1102                    literals.len() as u32,
1103                    &mut self.offset_hist,
1104                );
1105                handle_sequence(Sequence::Triple {
1106                    literals,
1107                    offset,
1108                    match_len,
1109                });
1110
1111                return true;
1112            }
1113
1114            let last_entry = self.window.last_mut().unwrap();
1115            let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
1116            if !last_entry.suffixes.contains_key(key) {
1117                last_entry.suffixes.insert(key, self.suffix_idx);
1118            }
1119            self.suffix_idx += 1;
1120        }
1121    }
1122
1123    /// Find the common prefix length between two byte slices
1124    #[inline(always)]
1125    fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
1126        let max = a.len().min(b.len());
1127        let mut off = 0usize;
1128        let lhs = a.as_ptr();
1129        let rhs = b.as_ptr();
1130
1131        match Self::detect_prefix_kernel() {
1132            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1133            PrefixKernel::X86Avx2 => {
1134                off = unsafe { Self::prefix_len_simd_avx2(lhs, rhs, max) };
1135            }
1136            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1137            PrefixKernel::X86Sse2 => {
1138                off = unsafe { Self::prefix_len_simd_sse2(lhs, rhs, max) };
1139            }
1140            #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1141            PrefixKernel::Aarch64Neon => {
1142                off = unsafe { Self::prefix_len_simd_neon(lhs, rhs, max) };
1143            }
1144            PrefixKernel::Scalar => {}
1145        }
1146
1147        Self::common_prefix_len_scalar(a, b, off, max)
1148    }
1149
1150    #[inline(always)]
1151    fn common_prefix_len_scalar(a: &[u8], b: &[u8], mut off: usize, max: usize) -> usize {
1152        let chunk = core::mem::size_of::<usize>();
1153        let lhs = a.as_ptr();
1154        let rhs = b.as_ptr();
1155        while off + chunk <= max {
1156            let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
1157            let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
1158            let diff = lhs_word ^ rhs_word;
1159            if diff != 0 {
1160                return off + Self::mismatch_byte_index(diff);
1161            }
1162            off += chunk;
1163        }
1164        off + core::iter::zip(&a[off..max], &b[off..max])
1165            .take_while(|(x, y)| x == y)
1166            .count()
1167    }
1168
1169    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1170    #[target_feature(enable = "sse2")]
1171    unsafe fn prefix_len_simd_sse2(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1172        let mut off = 0usize;
1173        while off + 16 <= max {
1174            let a: __m128i = unsafe { _mm_loadu_si128(lhs.add(off).cast::<__m128i>()) };
1175            let b: __m128i = unsafe { _mm_loadu_si128(rhs.add(off).cast::<__m128i>()) };
1176            let eq = _mm_cmpeq_epi8(a, b);
1177            let mask = _mm_movemask_epi8(eq) as u32;
1178            if mask != 0xFFFF {
1179                return off + (!mask).trailing_zeros() as usize;
1180            }
1181            off += 16;
1182        }
1183        off
1184    }
1185
1186    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1187    #[target_feature(enable = "avx2")]
1188    unsafe fn prefix_len_simd_avx2(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1189        let mut off = 0usize;
1190        while off + 32 <= max {
1191            let a: __m256i = unsafe { _mm256_loadu_si256(lhs.add(off).cast::<__m256i>()) };
1192            let b: __m256i = unsafe { _mm256_loadu_si256(rhs.add(off).cast::<__m256i>()) };
1193            let eq = _mm256_cmpeq_epi8(a, b);
1194            let mask = _mm256_movemask_epi8(eq) as u32;
1195            if mask != u32::MAX {
1196                return off + (!mask).trailing_zeros() as usize;
1197            }
1198            off += 32;
1199        }
1200        off
1201    }
1202
1203    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1204    #[target_feature(enable = "neon")]
1205    unsafe fn prefix_len_simd_neon(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1206        let mut off = 0usize;
1207        while off + 16 <= max {
1208            let a: uint8x16_t = unsafe { vld1q_u8(lhs.add(off)) };
1209            let b: uint8x16_t = unsafe { vld1q_u8(rhs.add(off)) };
1210            let eq = vceqq_u8(a, b);
1211            let lanes = vreinterpretq_u64_u8(eq);
1212            let low = vgetq_lane_u64(lanes, 0);
1213            if low != u64::MAX {
1214                let diff = low ^ u64::MAX;
1215                return off + Self::mismatch_byte_index(diff as usize);
1216            }
1217            let high = vgetq_lane_u64(lanes, 1);
1218            if high != u64::MAX {
1219                let diff = high ^ u64::MAX;
1220                return off + 8 + Self::mismatch_byte_index(diff as usize);
1221            }
1222            off += 16;
1223        }
1224        off
1225    }
1226
1227    /// Process bytes and add the suffixes to the suffix store up to a specific index
1228    #[inline(always)]
1229    fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
1230        let start = self.suffix_idx;
1231        let last_entry = self.window.last_mut().unwrap();
1232        if last_entry.data.len() < MIN_MATCH_LEN {
1233            return;
1234        }
1235        let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
1236        if insert_limit > start {
1237            let data = last_entry.data.as_slice();
1238            let suffixes = &mut last_entry.suffixes;
1239            if fill_step == FAST_HASH_FILL_STEP {
1240                Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
1241            } else {
1242                let mut pos = start;
1243                while pos < insert_limit {
1244                    Self::insert_suffix_if_absent(data, suffixes, pos);
1245                    pos += fill_step;
1246                }
1247            }
1248        }
1249
1250        if idx >= start + MIN_MATCH_LEN {
1251            let tail_start = idx - MIN_MATCH_LEN;
1252            let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
1253            if !last_entry.suffixes.contains_key(tail_key) {
1254                last_entry.suffixes.insert(tail_key, tail_start);
1255            }
1256        }
1257    }
1258
1259    #[inline(always)]
1260    fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
1261        debug_assert!(
1262            pos + MIN_MATCH_LEN <= data.len(),
1263            "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
1264            pos,
1265            MIN_MATCH_LEN,
1266            data.len()
1267        );
1268        let key = &data[pos..pos + MIN_MATCH_LEN];
1269        if !suffixes.contains_key(key) {
1270            suffixes.insert(key, pos);
1271        }
1272    }
1273
1274    #[inline(always)]
1275    fn add_suffixes_interleaved_fast(
1276        data: &[u8],
1277        suffixes: &mut SuffixStore,
1278        start: usize,
1279        insert_limit: usize,
1280    ) {
1281        let lane = FAST_HASH_FILL_STEP;
1282        let mut pos = start;
1283
1284        // Pipeline-ish fill: compute and retire several hash positions per loop
1285        // so the fastest path keeps multiple independent hash lookups in flight.
1286        while pos + lane * 3 < insert_limit {
1287            let p0 = pos;
1288            let p1 = pos + lane;
1289            let p2 = pos + lane * 2;
1290            let p3 = pos + lane * 3;
1291
1292            Self::insert_suffix_if_absent(data, suffixes, p0);
1293            Self::insert_suffix_if_absent(data, suffixes, p1);
1294            Self::insert_suffix_if_absent(data, suffixes, p2);
1295            Self::insert_suffix_if_absent(data, suffixes, p3);
1296
1297            pos += lane * 4;
1298        }
1299
1300        while pos < insert_limit {
1301            Self::insert_suffix_if_absent(data, suffixes, pos);
1302            pos += lane;
1303        }
1304    }
1305
1306    fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
1307        if literals_len != 0 {
1308            return None;
1309        }
1310
1311        let reps = [
1312            Some(self.offset_hist[1] as usize),
1313            Some(self.offset_hist[2] as usize),
1314            (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1315        ];
1316
1317        let mut best: Option<(usize, usize)> = None;
1318        let mut seen = [0usize; 3];
1319        let mut seen_len = 0usize;
1320        for offset in reps.into_iter().flatten() {
1321            if offset == 0 {
1322                continue;
1323            }
1324            if seen[..seen_len].contains(&offset) {
1325                continue;
1326            }
1327            seen[seen_len] = offset;
1328            seen_len += 1;
1329
1330            let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1331                continue;
1332            };
1333            if match_len < MIN_MATCH_LEN {
1334                continue;
1335            }
1336
1337            if best.is_none_or(|(old_offset, old_len)| {
1338                match_len > old_len || (match_len == old_len && offset < old_offset)
1339            }) {
1340                best = Some((offset, match_len));
1341            }
1342        }
1343        best
1344    }
1345
1346    fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1347        if offset == 0 {
1348            return None;
1349        }
1350
1351        let last_idx = self.window.len().checked_sub(1)?;
1352        let last_entry = &self.window[last_idx];
1353        let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1354        if offset > searchable_prefix {
1355            return None;
1356        }
1357
1358        let mut remaining = offset;
1359        let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1360            (last_idx, self.suffix_idx - remaining)
1361        } else {
1362            remaining -= self.suffix_idx;
1363            let mut found = None;
1364            for entry_idx in (0..last_idx).rev() {
1365                let len = self.window[entry_idx].data.len();
1366                if remaining <= len {
1367                    found = Some((entry_idx, len - remaining));
1368                    break;
1369                }
1370                remaining -= len;
1371            }
1372            found?
1373        };
1374
1375        let match_entry = &self.window[entry_idx];
1376        let match_slice = &match_entry.data[match_index..];
1377
1378        Some(Self::common_prefix_len(match_slice, data_slice))
1379    }
1380
1381    /// Skip matching for the whole current window entry.
1382    ///
1383    /// When callers already know the block is incompressible, index positions
1384    /// sparsely and keep a dense tail so the next block still gets boundary
1385    /// matches.
1386    fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
1387        let len = self.window.last().unwrap().data.len();
1388        if incompressible_hint == Some(true) {
1389            let dense_tail = MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
1390            let sparse_end = len.saturating_sub(dense_tail);
1391            self.add_suffixes_till(sparse_end, INCOMPRESSIBLE_SKIP_STEP);
1392            self.suffix_idx = sparse_end;
1393            self.add_suffixes_till(len, 1);
1394        } else {
1395            self.add_suffixes_till(len, 1);
1396        }
1397        self.suffix_idx = len;
1398        self.last_idx_in_sequence = len;
1399    }
1400
1401    /// Backward-compatible dense path used by tests.
1402    #[cfg(test)]
1403    fn skip_matching(&mut self) {
1404        self.skip_matching_with_hint(None);
1405    }
1406
1407    /// Add a new window entry. Will panic if the last window entry hasn't been processed properly.
1408    /// If any resources are released by pushing the new entry they are returned via the callback
1409    fn add_data(
1410        &mut self,
1411        data: Vec<u8>,
1412        suffixes: SuffixStore,
1413        reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1414    ) {
1415        assert!(
1416            self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1417        );
1418        self.reserve(data.len(), reuse_space);
1419        #[cfg(debug_assertions)]
1420        self.concat_window.extend_from_slice(&data);
1421
1422        if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1423            for entry in self.window.iter_mut() {
1424                entry.base_offset += last_len;
1425            }
1426        }
1427
1428        let len = data.len();
1429        self.window.push(WindowEntry {
1430            data,
1431            suffixes,
1432            base_offset: 0,
1433        });
1434        self.window_size += len;
1435        self.suffix_idx = 0;
1436        self.last_idx_in_sequence = 0;
1437    }
1438
1439    /// Reserve space for a new window entry
1440    /// If any resources are released by pushing the new entry they are returned via the callback
1441    fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1442        assert!(self.max_window_size >= amount);
1443        while self.window_size + amount > self.max_window_size {
1444            let removed = self.window.remove(0);
1445            self.window_size -= removed.data.len();
1446            #[cfg(debug_assertions)]
1447            self.concat_window.drain(0..removed.data.len());
1448
1449            let WindowEntry {
1450                suffixes,
1451                data: leaked_vec,
1452                base_offset: _,
1453            } = removed;
1454            reuse_space(leaked_vec, suffixes);
1455        }
1456    }
1457}
1458
1459struct DfastMatchGenerator {
1460    max_window_size: usize,
1461    window: VecDeque<Vec<u8>>,
1462    window_size: usize,
1463    // We keep a contiguous searchable history to avoid rebuilding and reseeding
1464    // the matcher state from disjoint block buffers on every block.
1465    history: Vec<u8>,
1466    history_start: usize,
1467    history_abs_start: usize,
1468    offset_hist: [u32; 3],
1469    short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1470    long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1471    hash_bits: usize,
1472    use_fast_loop: bool,
1473    // Lazy match lookahead depth (internal tuning parameter).
1474    lazy_depth: u8,
1475}
1476
1477#[derive(Copy, Clone)]
1478struct MatchCandidate {
1479    start: usize,
1480    offset: usize,
1481    match_len: usize,
1482}
1483
1484fn best_len_offset_candidate(
1485    lhs: Option<MatchCandidate>,
1486    rhs: Option<MatchCandidate>,
1487) -> Option<MatchCandidate> {
1488    match (lhs, rhs) {
1489        (None, other) | (other, None) => other,
1490        (Some(lhs), Some(rhs)) => {
1491            if rhs.match_len > lhs.match_len
1492                || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1493            {
1494                Some(rhs)
1495            } else {
1496                Some(lhs)
1497            }
1498        }
1499    }
1500}
1501
1502fn extend_backwards_shared(
1503    concat: &[u8],
1504    history_abs_start: usize,
1505    mut candidate_pos: usize,
1506    mut abs_pos: usize,
1507    mut match_len: usize,
1508    lit_len: usize,
1509) -> MatchCandidate {
1510    let min_abs_pos = abs_pos - lit_len;
1511    while abs_pos > min_abs_pos
1512        && candidate_pos > history_abs_start
1513        && concat[candidate_pos - history_abs_start - 1] == concat[abs_pos - history_abs_start - 1]
1514    {
1515        candidate_pos -= 1;
1516        abs_pos -= 1;
1517        match_len += 1;
1518    }
1519    MatchCandidate {
1520        start: abs_pos,
1521        offset: abs_pos - candidate_pos,
1522        match_len,
1523    }
1524}
1525
1526fn repcode_candidate_shared(
1527    concat: &[u8],
1528    history_abs_start: usize,
1529    offset_hist: [u32; 3],
1530    abs_pos: usize,
1531    lit_len: usize,
1532    min_match_len: usize,
1533) -> Option<MatchCandidate> {
1534    let reps = if lit_len == 0 {
1535        [
1536            Some(offset_hist[1] as usize),
1537            Some(offset_hist[2] as usize),
1538            (offset_hist[0] > 1).then_some((offset_hist[0] - 1) as usize),
1539        ]
1540    } else {
1541        [
1542            Some(offset_hist[0] as usize),
1543            Some(offset_hist[1] as usize),
1544            Some(offset_hist[2] as usize),
1545        ]
1546    };
1547
1548    let current_idx = abs_pos - history_abs_start;
1549    if current_idx + min_match_len > concat.len() {
1550        return None;
1551    }
1552
1553    let mut best = None;
1554    for rep in reps.into_iter().flatten() {
1555        if rep == 0 || rep > abs_pos {
1556            continue;
1557        }
1558        let candidate_pos = abs_pos - rep;
1559        if candidate_pos < history_abs_start {
1560            continue;
1561        }
1562        let candidate_idx = candidate_pos - history_abs_start;
1563        let match_len =
1564            MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1565        if match_len >= min_match_len {
1566            let candidate = extend_backwards_shared(
1567                concat,
1568                history_abs_start,
1569                candidate_pos,
1570                abs_pos,
1571                match_len,
1572                lit_len,
1573            );
1574            best = best_len_offset_candidate(best, Some(candidate));
1575        }
1576    }
1577    best
1578}
1579
1580#[derive(Copy, Clone)]
1581struct LazyMatchConfig {
1582    target_len: usize,
1583    min_match_len: usize,
1584    lazy_depth: u8,
1585    history_abs_end: usize,
1586}
1587
1588fn pick_lazy_match_shared(
1589    abs_pos: usize,
1590    lit_len: usize,
1591    best: Option<MatchCandidate>,
1592    config: LazyMatchConfig,
1593    mut best_match_at: impl FnMut(usize, usize) -> Option<MatchCandidate>,
1594) -> Option<MatchCandidate> {
1595    let best = best?;
1596    if best.match_len >= config.target_len
1597        || abs_pos + 1 + config.min_match_len > config.history_abs_end
1598    {
1599        return Some(best);
1600    }
1601
1602    let next = best_match_at(abs_pos + 1, lit_len + 1);
1603    if let Some(next) = next
1604        && (next.match_len > best.match_len
1605            || (next.match_len == best.match_len && next.offset < best.offset))
1606    {
1607        return None;
1608    }
1609
1610    if config.lazy_depth >= 2 && abs_pos + 2 + config.min_match_len <= config.history_abs_end {
1611        let next2 = best_match_at(abs_pos + 2, lit_len + 2);
1612        if let Some(next2) = next2
1613            && next2.match_len > best.match_len + 1
1614        {
1615            return None;
1616        }
1617    }
1618
1619    Some(best)
1620}
1621
1622impl DfastMatchGenerator {
1623    // Keep a short dense tail at block boundaries for two related reasons:
1624    // 1) insert_position() needs short (4-byte) and long (8-byte) lookahead,
1625    //    so appending a new block can make starts from the previous block newly
1626    //    hashable and require backfill;
1627    // 2) we also need enough trailing bytes from the previous block to preserve
1628    //    cross-block matching for the minimum match length.
1629    const BOUNDARY_DENSE_TAIL_LEN: usize = DFAST_MIN_MATCH_LEN + 3;
1630
1631    fn new(max_window_size: usize) -> Self {
1632        Self {
1633            max_window_size,
1634            window: VecDeque::new(),
1635            window_size: 0,
1636            history: Vec::new(),
1637            history_start: 0,
1638            history_abs_start: 0,
1639            offset_hist: [1, 4, 8],
1640            short_hash: Vec::new(),
1641            long_hash: Vec::new(),
1642            hash_bits: DFAST_HASH_BITS,
1643            use_fast_loop: false,
1644            lazy_depth: 1,
1645        }
1646    }
1647
1648    fn set_hash_bits(&mut self, bits: usize) {
1649        let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1650        if self.hash_bits != clamped {
1651            self.hash_bits = clamped;
1652            self.short_hash = Vec::new();
1653            self.long_hash = Vec::new();
1654        }
1655    }
1656
1657    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1658        self.window_size = 0;
1659        self.history.clear();
1660        self.history_start = 0;
1661        self.history_abs_start = 0;
1662        self.offset_hist = [1, 4, 8];
1663        if !self.short_hash.is_empty() {
1664            self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1665            self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1666        }
1667        for mut data in self.window.drain(..) {
1668            data.resize(data.capacity(), 0);
1669            reuse_space(data);
1670        }
1671    }
1672
1673    fn get_last_space(&self) -> &[u8] {
1674        self.window.back().unwrap().as_slice()
1675    }
1676
1677    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1678        assert!(data.len() <= self.max_window_size);
1679        while self.window_size + data.len() > self.max_window_size {
1680            let removed = self.window.pop_front().unwrap();
1681            self.window_size -= removed.len();
1682            self.history_start += removed.len();
1683            self.history_abs_start += removed.len();
1684            reuse_space(removed);
1685        }
1686        self.compact_history();
1687        self.history.extend_from_slice(&data);
1688        self.window_size += data.len();
1689        self.window.push_back(data);
1690    }
1691
1692    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1693        while self.window_size > self.max_window_size {
1694            let removed = self.window.pop_front().unwrap();
1695            self.window_size -= removed.len();
1696            self.history_start += removed.len();
1697            self.history_abs_start += removed.len();
1698            reuse_space(removed);
1699        }
1700    }
1701
1702    fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
1703        self.ensure_hash_tables();
1704        let current_len = self.window.back().unwrap().len();
1705        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1706        let current_abs_end = current_abs_start + current_len;
1707        let tail_start = current_abs_start.saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN);
1708        if tail_start < current_abs_start {
1709            self.insert_positions(tail_start, current_abs_start);
1710        }
1711
1712        let used_sparse = incompressible_hint
1713            .unwrap_or_else(|| self.block_looks_incompressible(current_abs_start, current_abs_end));
1714        if used_sparse {
1715            self.insert_positions_with_step(
1716                current_abs_start,
1717                current_abs_end,
1718                DFAST_INCOMPRESSIBLE_SKIP_STEP,
1719            );
1720        } else {
1721            self.insert_positions(current_abs_start, current_abs_end);
1722        }
1723
1724        // Seed the tail densely only after sparse insertion so the next block
1725        // can match across the boundary without rehashing the full block twice.
1726        if used_sparse {
1727            let tail_start = current_abs_end
1728                .saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN)
1729                .max(current_abs_start);
1730            if tail_start < current_abs_end {
1731                self.insert_positions(tail_start, current_abs_end);
1732            }
1733        }
1734    }
1735
1736    fn skip_matching_dense(&mut self) {
1737        self.ensure_hash_tables();
1738        let current_len = self.window.back().unwrap().len();
1739        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1740        let current_abs_end = current_abs_start + current_len;
1741        let backfill_start = current_abs_start
1742            .saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN)
1743            .max(self.history_abs_start);
1744        if backfill_start < current_abs_start {
1745            self.insert_positions(backfill_start, current_abs_start);
1746        }
1747        self.insert_positions(current_abs_start, current_abs_end);
1748    }
1749
1750    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1751        self.ensure_hash_tables();
1752
1753        let current_len = self.window.back().unwrap().len();
1754        if current_len == 0 {
1755            return;
1756        }
1757
1758        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1759        if self.use_fast_loop {
1760            self.start_matching_fast_loop(current_abs_start, current_len, &mut handle_sequence);
1761            return;
1762        }
1763        self.start_matching_general(current_abs_start, current_len, &mut handle_sequence);
1764    }
1765
1766    fn start_matching_general(
1767        &mut self,
1768        current_abs_start: usize,
1769        current_len: usize,
1770        handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1771    ) {
1772        let use_adaptive_skip =
1773            self.block_looks_incompressible(current_abs_start, current_abs_start + current_len);
1774        let mut pos = 0usize;
1775        let mut literals_start = 0usize;
1776        let mut skip_step = 1usize;
1777        let mut next_skip_growth_pos = DFAST_SKIP_STEP_GROWTH_INTERVAL;
1778        let mut miss_run = 0usize;
1779        while pos + DFAST_MIN_MATCH_LEN <= current_len {
1780            let abs_pos = current_abs_start + pos;
1781            let lit_len = pos - literals_start;
1782
1783            let best = self.best_match(abs_pos, lit_len);
1784            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1785                let start = self.emit_candidate(
1786                    current_abs_start,
1787                    &mut literals_start,
1788                    candidate,
1789                    handle_sequence,
1790                );
1791                pos = start + candidate.match_len;
1792                skip_step = 1;
1793                next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1794                miss_run = 0;
1795            } else {
1796                self.insert_position(abs_pos);
1797                miss_run = miss_run.saturating_add(1);
1798                let use_local_adaptive_skip = miss_run >= DFAST_LOCAL_SKIP_TRIGGER;
1799                if use_adaptive_skip || use_local_adaptive_skip {
1800                    let skip_cap = if use_adaptive_skip {
1801                        DFAST_MAX_SKIP_STEP
1802                    } else {
1803                        2
1804                    };
1805                    if pos >= next_skip_growth_pos {
1806                        skip_step = (skip_step + 1).min(skip_cap);
1807                        next_skip_growth_pos =
1808                            next_skip_growth_pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1809                    }
1810                    pos = pos.saturating_add(skip_step);
1811                } else {
1812                    pos += 1;
1813                }
1814            }
1815        }
1816
1817        self.seed_remaining_hashable_starts(current_abs_start, current_len, pos);
1818        self.emit_trailing_literals(literals_start, handle_sequence);
1819    }
1820
1821    fn start_matching_fast_loop(
1822        &mut self,
1823        current_abs_start: usize,
1824        current_len: usize,
1825        handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1826    ) {
1827        let block_is_strict_incompressible = self
1828            .block_looks_incompressible_strict(current_abs_start, current_abs_start + current_len);
1829        let mut pos = 0usize;
1830        let mut literals_start = 0usize;
1831        let mut skip_step = 1usize;
1832        let mut next_skip_growth_pos = DFAST_SKIP_STEP_GROWTH_INTERVAL;
1833        let mut miss_run = 0usize;
1834        while pos + DFAST_MIN_MATCH_LEN <= current_len {
1835            let ip0 = pos;
1836            let ip1 = ip0.saturating_add(1);
1837            let ip2 = ip0.saturating_add(2);
1838            let ip3 = ip0.saturating_add(3);
1839
1840            let abs_ip0 = current_abs_start + ip0;
1841            let lit_len_ip0 = ip0 - literals_start;
1842
1843            if ip2 + DFAST_MIN_MATCH_LEN <= current_len {
1844                let abs_ip2 = current_abs_start + ip2;
1845                let lit_len_ip2 = ip2 - literals_start;
1846                if let Some(rep) = self.repcode_candidate(abs_ip2, lit_len_ip2)
1847                    && rep.start >= current_abs_start + literals_start
1848                    && rep.start <= abs_ip2
1849                {
1850                    let start = self.emit_candidate(
1851                        current_abs_start,
1852                        &mut literals_start,
1853                        rep,
1854                        handle_sequence,
1855                    );
1856                    pos = start + rep.match_len;
1857                    skip_step = 1;
1858                    next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1859                    miss_run = 0;
1860                    continue;
1861                }
1862            }
1863
1864            let best = self.best_match(abs_ip0, lit_len_ip0);
1865            if let Some(candidate) = best {
1866                let start = self.emit_candidate(
1867                    current_abs_start,
1868                    &mut literals_start,
1869                    candidate,
1870                    handle_sequence,
1871                );
1872                pos = start + candidate.match_len;
1873                skip_step = 1;
1874                next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1875                miss_run = 0;
1876            } else {
1877                self.insert_position(abs_ip0);
1878                if ip1 + 4 <= current_len {
1879                    self.insert_position(current_abs_start + ip1);
1880                }
1881                if ip2 + 4 <= current_len {
1882                    self.insert_position(current_abs_start + ip2);
1883                }
1884                if ip3 + 4 <= current_len {
1885                    self.insert_position(current_abs_start + ip3);
1886                }
1887                miss_run = miss_run.saturating_add(1);
1888                if block_is_strict_incompressible || miss_run >= DFAST_LOCAL_SKIP_TRIGGER {
1889                    let skip_cap = DFAST_MAX_SKIP_STEP;
1890                    if pos >= next_skip_growth_pos {
1891                        skip_step = (skip_step + 1).min(skip_cap);
1892                        next_skip_growth_pos =
1893                            next_skip_growth_pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1894                    }
1895                    pos = pos.saturating_add(skip_step);
1896                } else {
1897                    skip_step = 1;
1898                    next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1899                    pos += 1;
1900                }
1901            }
1902        }
1903
1904        self.seed_remaining_hashable_starts(current_abs_start, current_len, pos);
1905        self.emit_trailing_literals(literals_start, handle_sequence);
1906    }
1907
1908    fn seed_remaining_hashable_starts(
1909        &mut self,
1910        current_abs_start: usize,
1911        current_len: usize,
1912        pos: usize,
1913    ) {
1914        let boundary_tail_start = current_len.saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN);
1915        let mut seed_pos = pos.min(current_len).min(boundary_tail_start);
1916        while seed_pos + DFAST_SHORT_HASH_LOOKAHEAD <= current_len {
1917            self.insert_position(current_abs_start + seed_pos);
1918            seed_pos += 1;
1919        }
1920    }
1921
1922    fn emit_candidate(
1923        &mut self,
1924        current_abs_start: usize,
1925        literals_start: &mut usize,
1926        candidate: MatchCandidate,
1927        handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1928    ) -> usize {
1929        self.insert_positions(
1930            current_abs_start + *literals_start,
1931            candidate.start + candidate.match_len,
1932        );
1933        let current = self.window.back().unwrap().as_slice();
1934        let start = candidate.start - current_abs_start;
1935        let literals = &current[*literals_start..start];
1936        handle_sequence(Sequence::Triple {
1937            literals,
1938            offset: candidate.offset,
1939            match_len: candidate.match_len,
1940        });
1941        let _ = encode_offset_with_history(
1942            candidate.offset as u32,
1943            literals.len() as u32,
1944            &mut self.offset_hist,
1945        );
1946        *literals_start = start + candidate.match_len;
1947        start
1948    }
1949
1950    fn emit_trailing_literals(
1951        &self,
1952        literals_start: usize,
1953        handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1954    ) {
1955        if literals_start < self.window.back().unwrap().len() {
1956            let current = self.window.back().unwrap().as_slice();
1957            handle_sequence(Sequence::Literals {
1958                literals: &current[literals_start..],
1959            });
1960        }
1961    }
1962
1963    fn ensure_hash_tables(&mut self) {
1964        let table_len = 1usize << self.hash_bits;
1965        if self.short_hash.len() != table_len {
1966            // This is intentionally lazy so Fastest/Uncompressed never pay the
1967            // ~dfast-level memory cost. The current size tracks the issue's
1968            // zstd level-3 style parameters rather than a generic low-memory preset.
1969            self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1970            self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1971        }
1972    }
1973
1974    fn compact_history(&mut self) {
1975        if self.history_start == 0 {
1976            return;
1977        }
1978        if self.history_start >= self.max_window_size
1979            || self.history_start * 2 >= self.history.len()
1980        {
1981            self.history.drain(..self.history_start);
1982            self.history_start = 0;
1983        }
1984    }
1985
1986    fn live_history(&self) -> &[u8] {
1987        &self.history[self.history_start..]
1988    }
1989
1990    fn history_abs_end(&self) -> usize {
1991        self.history_abs_start + self.live_history().len()
1992    }
1993
1994    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1995        let rep = self.repcode_candidate(abs_pos, lit_len);
1996        let hash = self.hash_candidate(abs_pos, lit_len);
1997        best_len_offset_candidate(rep, hash)
1998    }
1999
2000    fn pick_lazy_match(
2001        &self,
2002        abs_pos: usize,
2003        lit_len: usize,
2004        best: Option<MatchCandidate>,
2005    ) -> Option<MatchCandidate> {
2006        pick_lazy_match_shared(
2007            abs_pos,
2008            lit_len,
2009            best,
2010            LazyMatchConfig {
2011                target_len: DFAST_TARGET_LEN,
2012                min_match_len: DFAST_MIN_MATCH_LEN,
2013                lazy_depth: self.lazy_depth,
2014                history_abs_end: self.history_abs_end(),
2015            },
2016            |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
2017        )
2018    }
2019
2020    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2021        repcode_candidate_shared(
2022            self.live_history(),
2023            self.history_abs_start,
2024            self.offset_hist,
2025            abs_pos,
2026            lit_len,
2027            DFAST_MIN_MATCH_LEN,
2028        )
2029    }
2030
2031    fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2032        let concat = self.live_history();
2033        let current_idx = abs_pos - self.history_abs_start;
2034        let mut best = None;
2035        for candidate_pos in self.long_candidates(abs_pos) {
2036            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
2037                continue;
2038            }
2039            let candidate_idx = candidate_pos - self.history_abs_start;
2040            let match_len =
2041                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2042            if match_len >= DFAST_MIN_MATCH_LEN {
2043                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2044                best = best_len_offset_candidate(best, Some(candidate));
2045                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
2046                    return best;
2047                }
2048            }
2049        }
2050
2051        for candidate_pos in self.short_candidates(abs_pos) {
2052            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
2053                continue;
2054            }
2055            let candidate_idx = candidate_pos - self.history_abs_start;
2056            let match_len =
2057                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2058            if match_len >= DFAST_MIN_MATCH_LEN {
2059                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2060                best = best_len_offset_candidate(best, Some(candidate));
2061                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
2062                    return best;
2063                }
2064            }
2065        }
2066        best
2067    }
2068
2069    fn extend_backwards(
2070        &self,
2071        candidate_pos: usize,
2072        abs_pos: usize,
2073        match_len: usize,
2074        lit_len: usize,
2075    ) -> MatchCandidate {
2076        extend_backwards_shared(
2077            self.live_history(),
2078            self.history_abs_start,
2079            candidate_pos,
2080            abs_pos,
2081            match_len,
2082            lit_len,
2083        )
2084    }
2085
2086    fn insert_positions(&mut self, start: usize, end: usize) {
2087        let start = start.max(self.history_abs_start);
2088        let end = end.min(self.history_abs_end());
2089        for pos in start..end {
2090            self.insert_position(pos);
2091        }
2092    }
2093
2094    fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
2095        let start = start.max(self.history_abs_start);
2096        let end = end.min(self.history_abs_end());
2097        if step <= 1 {
2098            self.insert_positions(start, end);
2099            return;
2100        }
2101        let mut pos = start;
2102        while pos < end {
2103            self.insert_position(pos);
2104            pos = pos.saturating_add(step);
2105        }
2106    }
2107
2108    fn insert_position(&mut self, pos: usize) {
2109        let idx = pos - self.history_abs_start;
2110        let short = {
2111            let concat = self.live_history();
2112            (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
2113        };
2114        if let Some(short) = short {
2115            let bucket = &mut self.short_hash[short];
2116            if bucket[0] != pos {
2117                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
2118                bucket[0] = pos;
2119            }
2120        }
2121
2122        let long = {
2123            let concat = self.live_history();
2124            (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
2125        };
2126        if let Some(long) = long {
2127            let bucket = &mut self.long_hash[long];
2128            if bucket[0] != pos {
2129                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
2130                bucket[0] = pos;
2131            }
2132        }
2133    }
2134
2135    fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
2136        let concat = self.live_history();
2137        let idx = pos - self.history_abs_start;
2138        (idx + 4 <= concat.len())
2139            .then(|| self.short_hash[self.hash4(&concat[idx..])])
2140            .into_iter()
2141            .flatten()
2142            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
2143    }
2144
2145    fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
2146        let concat = self.live_history();
2147        let idx = pos - self.history_abs_start;
2148        (idx + 8 <= concat.len())
2149            .then(|| self.long_hash[self.hash8(&concat[idx..])])
2150            .into_iter()
2151            .flatten()
2152            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
2153    }
2154
2155    fn hash4(&self, data: &[u8]) -> usize {
2156        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2157        self.hash_index(value)
2158    }
2159
2160    fn hash8(&self, data: &[u8]) -> usize {
2161        let value = u64::from_le_bytes(data[..8].try_into().unwrap());
2162        self.hash_index(value)
2163    }
2164
2165    fn block_looks_incompressible(&self, start: usize, end: usize) -> bool {
2166        let live = self.live_history();
2167        if start >= end || start < self.history_abs_start {
2168            return false;
2169        }
2170        let start_idx = start - self.history_abs_start;
2171        let end_idx = end - self.history_abs_start;
2172        if end_idx > live.len() {
2173            return false;
2174        }
2175        let block = &live[start_idx..end_idx];
2176        block_looks_incompressible(block)
2177    }
2178
2179    fn block_looks_incompressible_strict(&self, start: usize, end: usize) -> bool {
2180        let live = self.live_history();
2181        if start >= end || start < self.history_abs_start {
2182            return false;
2183        }
2184        let start_idx = start - self.history_abs_start;
2185        let end_idx = end - self.history_abs_start;
2186        if end_idx > live.len() {
2187            return false;
2188        }
2189        let block = &live[start_idx..end_idx];
2190        block_looks_incompressible_strict(block)
2191    }
2192
2193    fn hash_index(&self, value: u64) -> usize {
2194        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2195        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize
2196    }
2197}
2198
2199struct RowMatchGenerator {
2200    max_window_size: usize,
2201    window: VecDeque<Vec<u8>>,
2202    window_size: usize,
2203    history: Vec<u8>,
2204    history_start: usize,
2205    history_abs_start: usize,
2206    offset_hist: [u32; 3],
2207    row_hash_log: usize,
2208    row_log: usize,
2209    search_depth: usize,
2210    target_len: usize,
2211    lazy_depth: u8,
2212    row_heads: Vec<u8>,
2213    row_positions: Vec<usize>,
2214    row_tags: Vec<u8>,
2215}
2216
2217impl RowMatchGenerator {
2218    fn new(max_window_size: usize) -> Self {
2219        Self {
2220            max_window_size,
2221            window: VecDeque::new(),
2222            window_size: 0,
2223            history: Vec::new(),
2224            history_start: 0,
2225            history_abs_start: 0,
2226            offset_hist: [1, 4, 8],
2227            row_hash_log: ROW_HASH_BITS - ROW_LOG,
2228            row_log: ROW_LOG,
2229            search_depth: ROW_SEARCH_DEPTH,
2230            target_len: ROW_TARGET_LEN,
2231            lazy_depth: 1,
2232            row_heads: Vec::new(),
2233            row_positions: Vec::new(),
2234            row_tags: Vec::new(),
2235        }
2236    }
2237
2238    fn set_hash_bits(&mut self, bits: usize) {
2239        let clamped = bits.clamp(self.row_log + 1, ROW_HASH_BITS);
2240        let row_hash_log = clamped.saturating_sub(self.row_log);
2241        if self.row_hash_log != row_hash_log {
2242            self.row_hash_log = row_hash_log;
2243            self.row_heads.clear();
2244            self.row_positions.clear();
2245            self.row_tags.clear();
2246        }
2247    }
2248
2249    fn configure(&mut self, config: RowConfig) {
2250        self.row_log = config.row_log.clamp(4, 6);
2251        self.search_depth = config.search_depth;
2252        self.target_len = config.target_len;
2253        self.set_hash_bits(config.hash_bits.max(self.row_log + 1));
2254    }
2255
2256    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2257        self.window_size = 0;
2258        self.history.clear();
2259        self.history_start = 0;
2260        self.history_abs_start = 0;
2261        self.offset_hist = [1, 4, 8];
2262        self.row_heads.fill(0);
2263        self.row_positions.fill(ROW_EMPTY_SLOT);
2264        self.row_tags.fill(0);
2265        for mut data in self.window.drain(..) {
2266            data.resize(data.capacity(), 0);
2267            reuse_space(data);
2268        }
2269    }
2270
2271    fn get_last_space(&self) -> &[u8] {
2272        self.window.back().unwrap().as_slice()
2273    }
2274
2275    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2276        assert!(data.len() <= self.max_window_size);
2277        while self.window_size + data.len() > self.max_window_size {
2278            let removed = self.window.pop_front().unwrap();
2279            self.window_size -= removed.len();
2280            self.history_start += removed.len();
2281            self.history_abs_start += removed.len();
2282            reuse_space(removed);
2283        }
2284        self.compact_history();
2285        self.history.extend_from_slice(&data);
2286        self.window_size += data.len();
2287        self.window.push_back(data);
2288    }
2289
2290    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2291        while self.window_size > self.max_window_size {
2292            let removed = self.window.pop_front().unwrap();
2293            self.window_size -= removed.len();
2294            self.history_start += removed.len();
2295            self.history_abs_start += removed.len();
2296            reuse_space(removed);
2297        }
2298    }
2299
2300    fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
2301        self.ensure_tables();
2302        let current_len = self.window.back().unwrap().len();
2303        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2304        let current_abs_end = current_abs_start + current_len;
2305        let backfill_start = self.backfill_start(current_abs_start);
2306        if backfill_start < current_abs_start {
2307            self.insert_positions(backfill_start, current_abs_start);
2308        }
2309        if incompressible_hint == Some(true) {
2310            self.insert_positions_with_step(
2311                current_abs_start,
2312                current_abs_end,
2313                INCOMPRESSIBLE_SKIP_STEP,
2314            );
2315            let dense_tail = ROW_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
2316            let tail_start = current_abs_end
2317                .saturating_sub(dense_tail)
2318                .max(current_abs_start);
2319            for pos in tail_start..current_abs_end {
2320                if !(pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
2321                    self.insert_position(pos);
2322                }
2323            }
2324        } else {
2325            self.insert_positions(current_abs_start, current_abs_end);
2326        }
2327    }
2328
2329    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2330        self.ensure_tables();
2331
2332        let current_len = self.window.back().unwrap().len();
2333        if current_len == 0 {
2334            return;
2335        }
2336        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2337        let backfill_start = self.backfill_start(current_abs_start);
2338        if backfill_start < current_abs_start {
2339            self.insert_positions(backfill_start, current_abs_start);
2340        }
2341
2342        let mut pos = 0usize;
2343        let mut literals_start = 0usize;
2344        while pos + ROW_MIN_MATCH_LEN <= current_len {
2345            let abs_pos = current_abs_start + pos;
2346            let lit_len = pos - literals_start;
2347
2348            let best = self.best_match(abs_pos, lit_len);
2349            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2350                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2351                let current = self.window.back().unwrap().as_slice();
2352                let start = candidate.start - current_abs_start;
2353                let literals = &current[literals_start..start];
2354                handle_sequence(Sequence::Triple {
2355                    literals,
2356                    offset: candidate.offset,
2357                    match_len: candidate.match_len,
2358                });
2359                let _ = encode_offset_with_history(
2360                    candidate.offset as u32,
2361                    literals.len() as u32,
2362                    &mut self.offset_hist,
2363                );
2364                pos = start + candidate.match_len;
2365                literals_start = pos;
2366            } else {
2367                self.insert_position(abs_pos);
2368                pos += 1;
2369            }
2370        }
2371
2372        while pos + ROW_HASH_KEY_LEN <= current_len {
2373            self.insert_position(current_abs_start + pos);
2374            pos += 1;
2375        }
2376
2377        if literals_start < current_len {
2378            let current = self.window.back().unwrap().as_slice();
2379            handle_sequence(Sequence::Literals {
2380                literals: &current[literals_start..],
2381            });
2382        }
2383    }
2384
2385    fn ensure_tables(&mut self) {
2386        let row_count = 1usize << self.row_hash_log;
2387        let row_entries = 1usize << self.row_log;
2388        let total = row_count * row_entries;
2389        if self.row_positions.len() != total {
2390            self.row_heads = alloc::vec![0; row_count];
2391            self.row_positions = alloc::vec![ROW_EMPTY_SLOT; total];
2392            self.row_tags = alloc::vec![0; total];
2393        }
2394    }
2395
2396    fn compact_history(&mut self) {
2397        if self.history_start == 0 {
2398            return;
2399        }
2400        if self.history_start >= self.max_window_size
2401            || self.history_start * 2 >= self.history.len()
2402        {
2403            self.history.drain(..self.history_start);
2404            self.history_start = 0;
2405        }
2406    }
2407
2408    fn live_history(&self) -> &[u8] {
2409        &self.history[self.history_start..]
2410    }
2411
2412    fn history_abs_end(&self) -> usize {
2413        self.history_abs_start + self.live_history().len()
2414    }
2415
2416    fn hash_and_row(&self, abs_pos: usize) -> Option<(usize, u8)> {
2417        let idx = abs_pos - self.history_abs_start;
2418        let concat = self.live_history();
2419        if idx + ROW_HASH_KEY_LEN > concat.len() {
2420            return None;
2421        }
2422        let value =
2423            u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
2424        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2425        let hash = value.wrapping_mul(PRIME);
2426        let total_bits = self.row_hash_log + ROW_TAG_BITS;
2427        let combined = hash >> (u64::BITS as usize - total_bits);
2428        let row_mask = (1usize << self.row_hash_log) - 1;
2429        let row = ((combined >> ROW_TAG_BITS) as usize) & row_mask;
2430        let tag = combined as u8;
2431        Some((row, tag))
2432    }
2433
2434    fn backfill_start(&self, current_abs_start: usize) -> usize {
2435        current_abs_start
2436            .saturating_sub(ROW_HASH_KEY_LEN - 1)
2437            .max(self.history_abs_start)
2438    }
2439
2440    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2441        let rep = self.repcode_candidate(abs_pos, lit_len);
2442        let row = self.row_candidate(abs_pos, lit_len);
2443        best_len_offset_candidate(rep, row)
2444    }
2445
2446    fn pick_lazy_match(
2447        &self,
2448        abs_pos: usize,
2449        lit_len: usize,
2450        best: Option<MatchCandidate>,
2451    ) -> Option<MatchCandidate> {
2452        pick_lazy_match_shared(
2453            abs_pos,
2454            lit_len,
2455            best,
2456            LazyMatchConfig {
2457                target_len: self.target_len,
2458                min_match_len: ROW_MIN_MATCH_LEN,
2459                lazy_depth: self.lazy_depth,
2460                history_abs_end: self.history_abs_end(),
2461            },
2462            |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
2463        )
2464    }
2465
2466    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2467        repcode_candidate_shared(
2468            self.live_history(),
2469            self.history_abs_start,
2470            self.offset_hist,
2471            abs_pos,
2472            lit_len,
2473            ROW_MIN_MATCH_LEN,
2474        )
2475    }
2476
2477    fn row_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2478        let concat = self.live_history();
2479        let current_idx = abs_pos - self.history_abs_start;
2480        if current_idx + ROW_MIN_MATCH_LEN > concat.len() {
2481            return None;
2482        }
2483
2484        let (row, tag) = self.hash_and_row(abs_pos)?;
2485        let row_entries = 1usize << self.row_log;
2486        let row_mask = row_entries - 1;
2487        let row_base = row << self.row_log;
2488        let head = self.row_heads[row] as usize;
2489        let max_walk = self.search_depth.min(row_entries);
2490
2491        let mut best = None;
2492        for i in 0..max_walk {
2493            let slot = (head + i) & row_mask;
2494            let idx = row_base + slot;
2495            if self.row_tags[idx] != tag {
2496                continue;
2497            }
2498            let candidate_pos = self.row_positions[idx];
2499            if candidate_pos == ROW_EMPTY_SLOT
2500                || candidate_pos < self.history_abs_start
2501                || candidate_pos >= abs_pos
2502            {
2503                continue;
2504            }
2505            let candidate_idx = candidate_pos - self.history_abs_start;
2506            let match_len =
2507                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2508            if match_len >= ROW_MIN_MATCH_LEN {
2509                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2510                best = best_len_offset_candidate(best, Some(candidate));
2511                if best.is_some_and(|best| best.match_len >= self.target_len) {
2512                    return best;
2513                }
2514            }
2515        }
2516        best
2517    }
2518
2519    fn extend_backwards(
2520        &self,
2521        candidate_pos: usize,
2522        abs_pos: usize,
2523        match_len: usize,
2524        lit_len: usize,
2525    ) -> MatchCandidate {
2526        extend_backwards_shared(
2527            self.live_history(),
2528            self.history_abs_start,
2529            candidate_pos,
2530            abs_pos,
2531            match_len,
2532            lit_len,
2533        )
2534    }
2535
2536    fn insert_positions(&mut self, start: usize, end: usize) {
2537        for pos in start..end {
2538            self.insert_position(pos);
2539        }
2540    }
2541
2542    fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
2543        if step <= 1 {
2544            self.insert_positions(start, end);
2545            return;
2546        }
2547        let mut pos = start;
2548        while pos < end {
2549            self.insert_position(pos);
2550            let next = pos.saturating_add(step);
2551            if next <= pos {
2552                break;
2553            }
2554            pos = next;
2555        }
2556    }
2557
2558    fn insert_position(&mut self, abs_pos: usize) {
2559        let Some((row, tag)) = self.hash_and_row(abs_pos) else {
2560            return;
2561        };
2562        let row_entries = 1usize << self.row_log;
2563        let row_mask = row_entries - 1;
2564        let row_base = row << self.row_log;
2565        let head = self.row_heads[row] as usize;
2566        let next = head.wrapping_sub(1) & row_mask;
2567        self.row_heads[row] = next as u8;
2568        self.row_tags[row_base + next] = tag;
2569        self.row_positions[row_base + next] = abs_pos;
2570    }
2571}
2572
2573struct HcMatchGenerator {
2574    max_window_size: usize,
2575    window: VecDeque<Vec<u8>>,
2576    window_size: usize,
2577    history: Vec<u8>,
2578    history_start: usize,
2579    history_abs_start: usize,
2580    position_base: usize,
2581    offset_hist: [u32; 3],
2582    hash_table: Vec<u32>,
2583    chain_table: Vec<u32>,
2584    lazy_depth: u8,
2585    hash_log: usize,
2586    chain_log: usize,
2587    search_depth: usize,
2588    target_len: usize,
2589}
2590
2591impl HcMatchGenerator {
2592    fn new(max_window_size: usize) -> Self {
2593        Self {
2594            max_window_size,
2595            window: VecDeque::new(),
2596            window_size: 0,
2597            history: Vec::new(),
2598            history_start: 0,
2599            history_abs_start: 0,
2600            position_base: 0,
2601            offset_hist: [1, 4, 8],
2602            hash_table: Vec::new(),
2603            chain_table: Vec::new(),
2604            lazy_depth: 2,
2605            hash_log: HC_HASH_LOG,
2606            chain_log: HC_CHAIN_LOG,
2607            search_depth: HC_SEARCH_DEPTH,
2608            target_len: HC_TARGET_LEN,
2609        }
2610    }
2611
2612    fn configure(&mut self, config: HcConfig) {
2613        let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
2614        self.hash_log = config.hash_log;
2615        self.chain_log = config.chain_log;
2616        self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
2617        self.target_len = config.target_len;
2618        if resize && !self.hash_table.is_empty() {
2619            // Force reallocation on next ensure_tables() call.
2620            self.hash_table.clear();
2621            self.chain_table.clear();
2622        }
2623    }
2624
2625    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2626        self.window_size = 0;
2627        self.history.clear();
2628        self.history_start = 0;
2629        self.history_abs_start = 0;
2630        self.position_base = 0;
2631        self.offset_hist = [1, 4, 8];
2632        if !self.hash_table.is_empty() {
2633            self.hash_table.fill(HC_EMPTY);
2634            self.chain_table.fill(HC_EMPTY);
2635        }
2636        for mut data in self.window.drain(..) {
2637            data.resize(data.capacity(), 0);
2638            reuse_space(data);
2639        }
2640    }
2641
2642    fn get_last_space(&self) -> &[u8] {
2643        self.window.back().unwrap().as_slice()
2644    }
2645
2646    // History duplicates window data for O(1) contiguous access during match
2647    // finding (common_prefix_len, extend_backwards). Same pattern as
2648    // DfastMatchGenerator. Peak: ~2x window size for data buffers + 6 MB tables.
2649    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2650        assert!(data.len() <= self.max_window_size);
2651        while self.window_size + data.len() > self.max_window_size {
2652            let removed = self.window.pop_front().unwrap();
2653            self.window_size -= removed.len();
2654            self.history_start += removed.len();
2655            self.history_abs_start += removed.len();
2656            reuse_space(removed);
2657        }
2658        self.compact_history();
2659        self.history.extend_from_slice(&data);
2660        self.window_size += data.len();
2661        self.window.push_back(data);
2662    }
2663
2664    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2665        while self.window_size > self.max_window_size {
2666            let removed = self.window.pop_front().unwrap();
2667            self.window_size -= removed.len();
2668            self.history_start += removed.len();
2669            self.history_abs_start += removed.len();
2670            reuse_space(removed);
2671        }
2672    }
2673
2674    /// Backfill positions from the tail of the previous slice that couldn't be
2675    /// hashed at the time (insert_position needs 4 bytes of lookahead).
2676    fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
2677        let backfill_start = current_abs_start
2678            .saturating_sub(3)
2679            .max(self.history_abs_start);
2680        if backfill_start < current_abs_start {
2681            self.insert_positions(backfill_start, current_abs_start);
2682        }
2683    }
2684
2685    fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
2686        self.ensure_tables();
2687        let current_len = self.window.back().unwrap().len();
2688        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2689        let current_abs_end = current_abs_start + current_len;
2690        self.backfill_boundary_positions(current_abs_start);
2691        if incompressible_hint == Some(true) {
2692            self.insert_positions_with_step(
2693                current_abs_start,
2694                current_abs_end,
2695                INCOMPRESSIBLE_SKIP_STEP,
2696            );
2697            let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
2698            let tail_start = current_abs_end
2699                .saturating_sub(dense_tail)
2700                .max(self.history_abs_start);
2701            let tail_start = tail_start.max(current_abs_start);
2702            for pos in tail_start..current_abs_end {
2703                if !(pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
2704                    self.insert_position(pos);
2705                }
2706            }
2707        } else {
2708            self.insert_positions(current_abs_start, current_abs_end);
2709        }
2710    }
2711
2712    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2713        self.ensure_tables();
2714
2715        let current_len = self.window.back().unwrap().len();
2716        if current_len == 0 {
2717            return;
2718        }
2719
2720        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2721        self.backfill_boundary_positions(current_abs_start);
2722
2723        let mut pos = 0usize;
2724        let mut literals_start = 0usize;
2725        while pos + HC_MIN_MATCH_LEN <= current_len {
2726            let abs_pos = current_abs_start + pos;
2727            let lit_len = pos - literals_start;
2728
2729            let best = self.find_best_match(abs_pos, lit_len);
2730            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2731                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2732                let current = self.window.back().unwrap().as_slice();
2733                let start = candidate.start - current_abs_start;
2734                let literals = &current[literals_start..start];
2735                handle_sequence(Sequence::Triple {
2736                    literals,
2737                    offset: candidate.offset,
2738                    match_len: candidate.match_len,
2739                });
2740                let _ = encode_offset_with_history(
2741                    candidate.offset as u32,
2742                    literals.len() as u32,
2743                    &mut self.offset_hist,
2744                );
2745                pos = start + candidate.match_len;
2746                literals_start = pos;
2747            } else {
2748                self.insert_position(abs_pos);
2749                pos += 1;
2750            }
2751        }
2752
2753        // Insert remaining hashable positions in the tail (the matching loop
2754        // stops at HC_MIN_MATCH_LEN but insert_position only needs 4 bytes).
2755        while pos + 4 <= current_len {
2756            self.insert_position(current_abs_start + pos);
2757            pos += 1;
2758        }
2759
2760        if literals_start < current_len {
2761            let current = self.window.back().unwrap().as_slice();
2762            handle_sequence(Sequence::Literals {
2763                literals: &current[literals_start..],
2764            });
2765        }
2766    }
2767
2768    fn ensure_tables(&mut self) {
2769        if self.hash_table.is_empty() {
2770            self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
2771            self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
2772        }
2773    }
2774
2775    fn compact_history(&mut self) {
2776        if self.history_start == 0 {
2777            return;
2778        }
2779        if self.history_start >= self.max_window_size
2780            || self.history_start * 2 >= self.history.len()
2781        {
2782            self.history.drain(..self.history_start);
2783            self.history_start = 0;
2784        }
2785    }
2786
2787    fn live_history(&self) -> &[u8] {
2788        &self.history[self.history_start..]
2789    }
2790
2791    fn history_abs_end(&self) -> usize {
2792        self.history_abs_start + self.live_history().len()
2793    }
2794
2795    fn hash_position(&self, data: &[u8]) -> usize {
2796        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2797        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2798        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
2799    }
2800
2801    fn relative_position(&self, abs_pos: usize) -> Option<u32> {
2802        let rel = abs_pos.checked_sub(self.position_base)?;
2803        let rel_u32 = u32::try_from(rel).ok()?;
2804        // Positions are stored as (relative_pos + 1), with 0 reserved as the
2805        // empty sentinel. So the raw relative position itself must stay
2806        // strictly below u32::MAX.
2807        (rel_u32 < u32::MAX).then_some(rel_u32)
2808    }
2809
2810    fn maybe_rebase_positions(&mut self, abs_pos: usize) {
2811        let needs_rebase = self
2812            .relative_position(abs_pos)
2813            .is_none_or(|relative| relative >= u32::MAX - 1);
2814        if !needs_rebase {
2815            return;
2816        }
2817
2818        // Keep all live history addressable after rebase.
2819        self.position_base = self.history_abs_start;
2820        self.hash_table.fill(HC_EMPTY);
2821        self.chain_table.fill(HC_EMPTY);
2822
2823        let history_start = self.history_abs_start;
2824        // Rebuild only the already-inserted prefix. The caller inserts abs_pos
2825        // immediately after this, and later positions are added in-order.
2826        for pos in history_start..abs_pos {
2827            self.insert_position_no_rebase(pos);
2828        }
2829    }
2830
2831    fn insert_position(&mut self, abs_pos: usize) {
2832        self.maybe_rebase_positions(abs_pos);
2833        self.insert_position_no_rebase(abs_pos);
2834    }
2835
2836    fn insert_position_no_rebase(&mut self, abs_pos: usize) {
2837        let idx = abs_pos - self.history_abs_start;
2838        let concat = self.live_history();
2839        if idx + 4 > concat.len() {
2840            return;
2841        }
2842        let hash = self.hash_position(&concat[idx..]);
2843        let Some(relative_pos) = self.relative_position(abs_pos) else {
2844            return;
2845        };
2846        let stored = relative_pos + 1;
2847        let chain_idx = relative_pos as usize & ((1 << self.chain_log) - 1);
2848        let prev = self.hash_table[hash];
2849        self.chain_table[chain_idx] = prev;
2850        self.hash_table[hash] = stored;
2851    }
2852
2853    fn insert_positions(&mut self, start: usize, end: usize) {
2854        for pos in start..end {
2855            self.insert_position(pos);
2856        }
2857    }
2858
2859    fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
2860        if step == 0 {
2861            return;
2862        }
2863        let mut pos = start;
2864        while pos < end {
2865            self.insert_position(pos);
2866            let next = pos.saturating_add(step);
2867            if next <= pos {
2868                break;
2869            }
2870            pos = next;
2871        }
2872    }
2873
2874    // Fixed-size stack array is intentional: it avoids heap allocation on
2875    // the hot path and the sentinel loop exits at self.search_depth.
2876    fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
2877        let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
2878        let idx = abs_pos - self.history_abs_start;
2879        let concat = self.live_history();
2880        if idx + 4 > concat.len() {
2881            return buf;
2882        }
2883        let hash = self.hash_position(&concat[idx..]);
2884        let chain_mask = (1 << self.chain_log) - 1;
2885
2886        let mut cur = self.hash_table[hash];
2887        let mut filled = 0;
2888        // Follow chain up to search_depth valid candidates, skipping stale
2889        // entries (evicted from window) instead of stopping at them.
2890        // Stored values are (relative_pos + 1); decode with wrapping_sub(1)
2891        // and recover absolute position via position_base + relative.
2892        // Break on self-loops (masked chain_idx collision at periodicity).
2893        // Cap total steps at 4x search depth to bound time spent skipping
2894        // stale entries while still finding valid candidates deeper in chain.
2895        let mut steps = 0;
2896        let max_chain_steps = self.search_depth * 4;
2897        while filled < self.search_depth && steps < max_chain_steps {
2898            if cur == HC_EMPTY {
2899                break;
2900            }
2901            let candidate_rel = cur.wrapping_sub(1) as usize;
2902            let candidate_abs = self.position_base + candidate_rel;
2903            let next = self.chain_table[candidate_rel & chain_mask];
2904            steps += 1;
2905            if next == cur {
2906                // Self-loop: two positions share chain_idx, stop to avoid
2907                // spinning on the same candidate forever.
2908                if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
2909                    buf[filled] = candidate_abs;
2910                }
2911                break;
2912            }
2913            cur = next;
2914            if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
2915                continue;
2916            }
2917            buf[filled] = candidate_abs;
2918            filled += 1;
2919        }
2920        buf
2921    }
2922
2923    fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2924        let rep = self.repcode_candidate(abs_pos, lit_len);
2925        let hash = self.hash_chain_candidate(abs_pos, lit_len);
2926        Self::better_candidate(rep, hash)
2927    }
2928
2929    fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2930        let concat = self.live_history();
2931        let current_idx = abs_pos - self.history_abs_start;
2932        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2933            return None;
2934        }
2935
2936        let mut best: Option<MatchCandidate> = None;
2937        for candidate_abs in self.chain_candidates(abs_pos) {
2938            if candidate_abs == usize::MAX {
2939                break;
2940            }
2941            let candidate_idx = candidate_abs - self.history_abs_start;
2942            let match_len =
2943                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2944            if match_len >= HC_MIN_MATCH_LEN {
2945                let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
2946                best = Self::better_candidate(best, Some(candidate));
2947                if best.is_some_and(|b| b.match_len >= self.target_len) {
2948                    return best;
2949                }
2950            }
2951        }
2952        best
2953    }
2954
2955    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2956        let reps = if lit_len == 0 {
2957            [
2958                Some(self.offset_hist[1] as usize),
2959                Some(self.offset_hist[2] as usize),
2960                (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
2961            ]
2962        } else {
2963            [
2964                Some(self.offset_hist[0] as usize),
2965                Some(self.offset_hist[1] as usize),
2966                Some(self.offset_hist[2] as usize),
2967            ]
2968        };
2969
2970        let concat = self.live_history();
2971        let current_idx = abs_pos - self.history_abs_start;
2972        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2973            return None;
2974        }
2975
2976        let mut best = None;
2977        for rep in reps.into_iter().flatten() {
2978            if rep == 0 || rep > abs_pos {
2979                continue;
2980            }
2981            let candidate_pos = abs_pos - rep;
2982            if candidate_pos < self.history_abs_start {
2983                continue;
2984            }
2985            let candidate_idx = candidate_pos - self.history_abs_start;
2986            let match_len =
2987                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2988            if match_len >= HC_MIN_MATCH_LEN {
2989                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2990                best = Self::better_candidate(best, Some(candidate));
2991            }
2992        }
2993        best
2994    }
2995
2996    fn extend_backwards(
2997        &self,
2998        mut candidate_pos: usize,
2999        mut abs_pos: usize,
3000        mut match_len: usize,
3001        lit_len: usize,
3002    ) -> MatchCandidate {
3003        let concat = self.live_history();
3004        let min_abs_pos = abs_pos - lit_len;
3005        while abs_pos > min_abs_pos
3006            && candidate_pos > self.history_abs_start
3007            && concat[candidate_pos - self.history_abs_start - 1]
3008                == concat[abs_pos - self.history_abs_start - 1]
3009        {
3010            candidate_pos -= 1;
3011            abs_pos -= 1;
3012            match_len += 1;
3013        }
3014        MatchCandidate {
3015            start: abs_pos,
3016            offset: abs_pos - candidate_pos,
3017            match_len,
3018        }
3019    }
3020
3021    fn better_candidate(
3022        lhs: Option<MatchCandidate>,
3023        rhs: Option<MatchCandidate>,
3024    ) -> Option<MatchCandidate> {
3025        match (lhs, rhs) {
3026            (None, other) | (other, None) => other,
3027            (Some(lhs), Some(rhs)) => {
3028                let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
3029                let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
3030                if rhs_gain > lhs_gain {
3031                    Some(rhs)
3032                } else {
3033                    Some(lhs)
3034                }
3035            }
3036        }
3037    }
3038
3039    fn match_gain(match_len: usize, offset: usize) -> i32 {
3040        debug_assert!(
3041            offset > 0,
3042            "zstd offsets are 1-indexed, offset=0 is invalid"
3043        );
3044        let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
3045        (match_len as i32) * 4 - offset_bits
3046    }
3047
3048    // Lazy lookahead queries pos+1/pos+2 before they are inserted into hash
3049    // tables — matching C zstd behavior. Seeding before comparing would let a
3050    // position match against itself, changing semantics.
3051    fn pick_lazy_match(
3052        &self,
3053        abs_pos: usize,
3054        lit_len: usize,
3055        best: Option<MatchCandidate>,
3056    ) -> Option<MatchCandidate> {
3057        let best = best?;
3058        if best.match_len >= self.target_len
3059            || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
3060        {
3061            return Some(best);
3062        }
3063
3064        let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
3065
3066        // Lazy check: evaluate pos+1
3067        let next = self.find_best_match(abs_pos + 1, lit_len + 1);
3068        if let Some(next) = next {
3069            let next_gain = Self::match_gain(next.match_len, next.offset);
3070            if next_gain > current_gain {
3071                return None;
3072            }
3073        }
3074
3075        // Lazy2 check: also evaluate pos+2
3076        if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
3077            let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
3078            if let Some(next2) = next2 {
3079                let next2_gain = Self::match_gain(next2.match_len, next2.offset);
3080                // Must beat current gain + extra literal cost
3081                if next2_gain > current_gain + 4 {
3082                    return None;
3083                }
3084            }
3085        }
3086
3087        Some(best)
3088    }
3089}
3090
3091#[test]
3092fn matches() {
3093    let mut matcher = MatchGenerator::new(1000);
3094    let mut original_data = Vec::new();
3095    let mut reconstructed = Vec::new();
3096
3097    let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
3098        Sequence::Literals { literals } => {
3099            assert!(!literals.is_empty());
3100            reconstructed.extend_from_slice(literals);
3101        }
3102        Sequence::Triple {
3103            literals,
3104            offset,
3105            match_len,
3106        } => {
3107            assert!(offset > 0);
3108            assert!(match_len >= MIN_MATCH_LEN);
3109            reconstructed.extend_from_slice(literals);
3110            assert!(offset <= reconstructed.len());
3111            let start = reconstructed.len() - offset;
3112            for i in 0..match_len {
3113                let byte = reconstructed[start + i];
3114                reconstructed.push(byte);
3115            }
3116        }
3117    };
3118
3119    matcher.add_data(
3120        alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
3121        SuffixStore::with_capacity(100),
3122        |_, _| {},
3123    );
3124    original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
3125
3126    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3127
3128    assert!(!matcher.next_sequence(|_| {}));
3129
3130    matcher.add_data(
3131        alloc::vec![
3132            1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3133        ],
3134        SuffixStore::with_capacity(100),
3135        |_, _| {},
3136    );
3137    original_data.extend_from_slice(&[
3138        1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3139    ]);
3140
3141    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3142    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3143    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3144    assert!(!matcher.next_sequence(|_| {}));
3145
3146    matcher.add_data(
3147        alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
3148        SuffixStore::with_capacity(100),
3149        |_, _| {},
3150    );
3151    original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
3152
3153    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3154    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3155    assert!(!matcher.next_sequence(|_| {}));
3156
3157    matcher.add_data(
3158        alloc::vec![0, 0, 0, 0, 0],
3159        SuffixStore::with_capacity(100),
3160        |_, _| {},
3161    );
3162    original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
3163
3164    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3165    assert!(!matcher.next_sequence(|_| {}));
3166
3167    matcher.add_data(
3168        alloc::vec![7, 8, 9, 10, 11],
3169        SuffixStore::with_capacity(100),
3170        |_, _| {},
3171    );
3172    original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
3173
3174    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3175    assert!(!matcher.next_sequence(|_| {}));
3176
3177    matcher.add_data(
3178        alloc::vec![1, 3, 5, 7, 9],
3179        SuffixStore::with_capacity(100),
3180        |_, _| {},
3181    );
3182    matcher.skip_matching();
3183    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
3184    reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
3185    assert!(!matcher.next_sequence(|_| {}));
3186
3187    matcher.add_data(
3188        alloc::vec![1, 3, 5, 7, 9],
3189        SuffixStore::with_capacity(100),
3190        |_, _| {},
3191    );
3192    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
3193
3194    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3195    assert!(!matcher.next_sequence(|_| {}));
3196
3197    matcher.add_data(
3198        alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
3199        SuffixStore::with_capacity(100),
3200        |_, _| {},
3201    );
3202    original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
3203
3204    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3205    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3206    assert!(!matcher.next_sequence(|_| {}));
3207
3208    assert_eq!(reconstructed, original_data);
3209}
3210
3211#[test]
3212fn dfast_matches_roundtrip_multi_block_pattern() {
3213    let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
3214    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3215    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3216
3217    let mut matcher = DfastMatchGenerator::new(1 << 22);
3218    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3219        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3220        Sequence::Triple {
3221            literals,
3222            offset,
3223            match_len,
3224        } => {
3225            decoded.extend_from_slice(literals);
3226            let start = decoded.len() - offset;
3227            for i in 0..match_len {
3228                let byte = decoded[start + i];
3229                decoded.push(byte);
3230            }
3231        }
3232    };
3233
3234    matcher.add_data(first_block.clone(), |_| {});
3235    let mut history = Vec::new();
3236    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3237    assert_eq!(history, first_block);
3238
3239    matcher.add_data(second_block.clone(), |_| {});
3240    let prefix_len = history.len();
3241    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3242
3243    assert_eq!(&history[prefix_len..], second_block.as_slice());
3244}
3245
3246#[test]
3247fn driver_switches_backends_and_initializes_dfast_via_reset() {
3248    let mut driver = MatchGeneratorDriver::new(32, 2);
3249
3250    driver.reset(CompressionLevel::Default);
3251    assert_eq!(driver.active_backend, MatcherBackend::Dfast);
3252    assert_eq!(driver.window_size(), (1u64 << 22));
3253
3254    let mut first = driver.get_next_space();
3255    first[..12].copy_from_slice(b"abcabcabcabc");
3256    first.truncate(12);
3257    driver.commit_space(first);
3258    assert_eq!(driver.get_last_space(), b"abcabcabcabc");
3259    driver.skip_matching_with_hint(None);
3260
3261    let mut second = driver.get_next_space();
3262    second[..12].copy_from_slice(b"abcabcabcabc");
3263    second.truncate(12);
3264    driver.commit_space(second);
3265
3266    let mut reconstructed = b"abcabcabcabc".to_vec();
3267    driver.start_matching(|seq| match seq {
3268        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
3269        Sequence::Triple {
3270            literals,
3271            offset,
3272            match_len,
3273        } => {
3274            reconstructed.extend_from_slice(literals);
3275            let start = reconstructed.len() - offset;
3276            for i in 0..match_len {
3277                let byte = reconstructed[start + i];
3278                reconstructed.push(byte);
3279            }
3280        }
3281    });
3282    assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
3283
3284    driver.reset(CompressionLevel::Fastest);
3285    assert_eq!(driver.window_size(), (1u64 << 17));
3286}
3287
3288#[test]
3289fn driver_level4_selects_row_backend() {
3290    let mut driver = MatchGeneratorDriver::new(32, 2);
3291    driver.reset(CompressionLevel::Level(4));
3292    assert_eq!(driver.active_backend, MatcherBackend::Row);
3293}
3294
3295#[test]
3296fn driver_small_source_hint_shrinks_dfast_hash_tables() {
3297    let mut driver = MatchGeneratorDriver::new(32, 2);
3298
3299    driver.reset(CompressionLevel::Level(2));
3300    let mut space = driver.get_next_space();
3301    space[..12].copy_from_slice(b"abcabcabcabc");
3302    space.truncate(12);
3303    driver.commit_space(space);
3304    driver.skip_matching_with_hint(None);
3305    let full_tables = driver.dfast_matcher().short_hash.len();
3306    assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
3307
3308    driver.set_source_size_hint(1024);
3309    driver.reset(CompressionLevel::Level(2));
3310    let mut space = driver.get_next_space();
3311    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3312    space.truncate(12);
3313    driver.commit_space(space);
3314    driver.skip_matching_with_hint(None);
3315    let hinted_tables = driver.dfast_matcher().short_hash.len();
3316
3317    assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
3318    assert_eq!(hinted_tables, 1 << MIN_HINTED_WINDOW_LOG);
3319    assert!(
3320        hinted_tables < full_tables,
3321        "tiny source hint should reduce dfast table footprint"
3322    );
3323}
3324
3325#[test]
3326fn driver_small_source_hint_shrinks_row_hash_tables() {
3327    let mut driver = MatchGeneratorDriver::new(32, 2);
3328
3329    driver.reset(CompressionLevel::Level(4));
3330    let mut space = driver.get_next_space();
3331    space[..12].copy_from_slice(b"abcabcabcabc");
3332    space.truncate(12);
3333    driver.commit_space(space);
3334    driver.skip_matching_with_hint(None);
3335    let full_rows = driver.row_matcher().row_heads.len();
3336    assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
3337
3338    driver.set_source_size_hint(1024);
3339    driver.reset(CompressionLevel::Level(4));
3340    let mut space = driver.get_next_space();
3341    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3342    space.truncate(12);
3343    driver.commit_space(space);
3344    driver.skip_matching_with_hint(None);
3345    let hinted_rows = driver.row_matcher().row_heads.len();
3346
3347    assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
3348    assert_eq!(
3349        hinted_rows,
3350        1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
3351    );
3352    assert!(
3353        hinted_rows < full_rows,
3354        "tiny source hint should reduce row hash table footprint"
3355    );
3356}
3357
3358#[test]
3359fn row_matches_roundtrip_multi_block_pattern() {
3360    let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
3361    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3362    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3363
3364    let mut matcher = RowMatchGenerator::new(1 << 22);
3365    matcher.configure(ROW_CONFIG);
3366    matcher.ensure_tables();
3367    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3368        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3369        Sequence::Triple {
3370            literals,
3371            offset,
3372            match_len,
3373        } => {
3374            decoded.extend_from_slice(literals);
3375            let start = decoded.len() - offset;
3376            for i in 0..match_len {
3377                let byte = decoded[start + i];
3378                decoded.push(byte);
3379            }
3380        }
3381    };
3382
3383    matcher.add_data(first_block.clone(), |_| {});
3384    let mut history = Vec::new();
3385    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3386    assert_eq!(history, first_block);
3387
3388    matcher.add_data(second_block.clone(), |_| {});
3389    let prefix_len = history.len();
3390    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3391
3392    assert_eq!(&history[prefix_len..], second_block.as_slice());
3393
3394    // Force a literals-only pass so the Sequence::Literals arm is exercised.
3395    let third_block: Vec<u8> = (0u8..=255).collect();
3396    matcher.add_data(third_block.clone(), |_| {});
3397    let third_prefix = history.len();
3398    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3399    assert_eq!(&history[third_prefix..], third_block.as_slice());
3400}
3401
3402#[test]
3403fn row_short_block_emits_literals_only() {
3404    let mut matcher = RowMatchGenerator::new(1 << 22);
3405    matcher.configure(ROW_CONFIG);
3406
3407    matcher.add_data(b"abcde".to_vec(), |_| {});
3408
3409    let mut saw_triple = false;
3410    let mut reconstructed = Vec::new();
3411    matcher.start_matching(|seq| match seq {
3412        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
3413        Sequence::Triple { .. } => saw_triple = true,
3414    });
3415
3416    assert!(
3417        !saw_triple,
3418        "row backend must not emit triples for short blocks"
3419    );
3420    assert_eq!(reconstructed, b"abcde");
3421
3422    // Then feed a clearly matchable block and ensure the Triple arm is reachable.
3423    saw_triple = false;
3424    matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
3425    matcher.start_matching(|seq| {
3426        if let Sequence::Triple { .. } = seq {
3427            saw_triple = true;
3428        }
3429    });
3430    assert!(
3431        saw_triple,
3432        "row backend should emit triples on repeated data"
3433    );
3434}
3435
3436#[test]
3437fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
3438    let mut matcher = RowMatchGenerator::new(1 << 22);
3439    matcher.configure(ROW_CONFIG);
3440    matcher.add_data(b"abcabc".to_vec(), |_| {});
3441
3442    let best = MatchCandidate {
3443        start: 0,
3444        offset: 1,
3445        match_len: ROW_MIN_MATCH_LEN,
3446    };
3447    let picked = matcher
3448        .pick_lazy_match(0, 0, Some(best))
3449        .expect("best candidate must survive");
3450
3451    assert_eq!(picked.start, best.start);
3452    assert_eq!(picked.offset, best.offset);
3453    assert_eq!(picked.match_len, best.match_len);
3454}
3455
3456#[test]
3457fn row_backfills_previous_block_tail_for_cross_boundary_match() {
3458    let mut matcher = RowMatchGenerator::new(1 << 22);
3459    matcher.configure(ROW_CONFIG);
3460
3461    let mut first_block = alloc::vec![0xA5; 64];
3462    first_block.extend_from_slice(b"XYZ");
3463    let second_block = b"XYZXYZtail".to_vec();
3464
3465    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3466        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3467        Sequence::Triple {
3468            literals,
3469            offset,
3470            match_len,
3471        } => {
3472            decoded.extend_from_slice(literals);
3473            let start = decoded.len() - offset;
3474            for i in 0..match_len {
3475                let byte = decoded[start + i];
3476                decoded.push(byte);
3477            }
3478        }
3479    };
3480
3481    matcher.add_data(first_block.clone(), |_| {});
3482    let mut reconstructed = Vec::new();
3483    matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
3484    assert_eq!(reconstructed, first_block);
3485
3486    matcher.add_data(second_block.clone(), |_| {});
3487    let mut saw_cross_boundary = false;
3488    let prefix_len = reconstructed.len();
3489    matcher.start_matching(|seq| {
3490        if let Sequence::Triple {
3491            literals,
3492            offset,
3493            match_len,
3494        } = seq
3495            && literals.is_empty()
3496            && offset == 3
3497            && match_len >= ROW_MIN_MATCH_LEN
3498        {
3499            saw_cross_boundary = true;
3500        }
3501        replay_sequence(&mut reconstructed, seq);
3502    });
3503
3504    assert!(
3505        saw_cross_boundary,
3506        "row matcher should reuse the 3-byte previous-block tail"
3507    );
3508    assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
3509}
3510
3511#[test]
3512fn row_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
3513    let data = deterministic_high_entropy_bytes(0xA713_9C5D_44E2_10B1, 4096);
3514
3515    let mut dense = RowMatchGenerator::new(1 << 22);
3516    dense.configure(ROW_CONFIG);
3517    dense.add_data(data.clone(), |_| {});
3518    dense.skip_matching_with_hint(Some(false));
3519    let dense_slots = dense
3520        .row_positions
3521        .iter()
3522        .filter(|&&pos| pos != ROW_EMPTY_SLOT)
3523        .count();
3524
3525    let mut sparse = RowMatchGenerator::new(1 << 22);
3526    sparse.configure(ROW_CONFIG);
3527    sparse.add_data(data, |_| {});
3528    sparse.skip_matching_with_hint(Some(true));
3529    let sparse_slots = sparse
3530        .row_positions
3531        .iter()
3532        .filter(|&&pos| pos != ROW_EMPTY_SLOT)
3533        .count();
3534
3535    assert!(
3536        sparse_slots < dense_slots,
3537        "incompressible hint should seed fewer row slots (sparse={sparse_slots}, dense={dense_slots})"
3538    );
3539}
3540
3541#[test]
3542fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
3543    let mut driver = MatchGeneratorDriver::new(32, 2);
3544
3545    driver.reset(CompressionLevel::Level(2));
3546    let mut space = driver.get_next_space();
3547    space[..12].copy_from_slice(b"abcabcabcabc");
3548    space.truncate(12);
3549    driver.commit_space(space);
3550    driver.skip_matching_with_hint(None);
3551
3552    let table_len = driver.dfast_matcher().short_hash.len();
3553    assert_eq!(
3554        table_len,
3555        1 << DFAST_HASH_BITS,
3556        "unhinted Level(2) should keep default dfast table size"
3557    );
3558}
3559
3560#[test]
3561fn simple_backend_rejects_undersized_pooled_suffix_store() {
3562    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3563    driver.reset(CompressionLevel::Fastest);
3564
3565    driver.suffix_pool.push(SuffixStore::with_capacity(1024));
3566
3567    let mut space = driver.get_next_space();
3568    space.clear();
3569    space.resize(4096, 0xAB);
3570    driver.commit_space(space);
3571
3572    let last_suffix_slots = driver
3573        .match_generator
3574        .window
3575        .last()
3576        .expect("window entry must exist after commit")
3577        .suffixes
3578        .slots
3579        .len();
3580    assert!(
3581        last_suffix_slots >= 4096,
3582        "undersized pooled suffix store must not be reused for larger blocks"
3583    );
3584}
3585
3586#[test]
3587fn source_hint_clamps_driver_slice_size_to_window() {
3588    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3589    driver.set_source_size_hint(1024);
3590    driver.reset(CompressionLevel::Default);
3591
3592    let window = driver.window_size() as usize;
3593    assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
3594    assert_eq!(driver.slice_size, window);
3595
3596    let space = driver.get_next_space();
3597    assert_eq!(space.len(), window);
3598    driver.commit_space(space);
3599}
3600
3601#[test]
3602fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
3603    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3604    driver.reset(CompressionLevel::Default);
3605
3606    let large = driver.get_next_space();
3607    let large_capacity = large.capacity();
3608    assert!(large_capacity >= 128 * 1024);
3609    driver.commit_space(large);
3610
3611    driver.set_source_size_hint(1024);
3612    driver.reset(CompressionLevel::Default);
3613
3614    let small = driver.get_next_space();
3615    assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
3616    assert!(
3617        small.capacity() >= large_capacity,
3618        "pooled buffer capacity should be preserved to avoid shrink/grow churn"
3619    );
3620}
3621
3622#[test]
3623fn driver_best_to_fastest_releases_oversized_hc_tables() {
3624    let mut driver = MatchGeneratorDriver::new(32, 2);
3625
3626    // Initialize at Best — allocates large HC tables (2M hash, 1M chain).
3627    driver.reset(CompressionLevel::Best);
3628    assert_eq!(driver.window_size(), (1u64 << 24));
3629
3630    // Feed data so tables are actually allocated via ensure_tables().
3631    let mut space = driver.get_next_space();
3632    space[..12].copy_from_slice(b"abcabcabcabc");
3633    space.truncate(12);
3634    driver.commit_space(space);
3635    driver.skip_matching_with_hint(None);
3636
3637    // Switch to Fastest — must release HC tables.
3638    driver.reset(CompressionLevel::Fastest);
3639    assert_eq!(driver.window_size(), (1u64 << 17));
3640
3641    // HC matcher should have empty tables after backend switch.
3642    let hc = driver.hc_match_generator.as_ref().unwrap();
3643    assert!(
3644        hc.hash_table.is_empty(),
3645        "HC hash_table should be released after switching away from Best"
3646    );
3647    assert!(
3648        hc.chain_table.is_empty(),
3649        "HC chain_table should be released after switching away from Best"
3650    );
3651}
3652
3653#[test]
3654fn driver_better_to_best_resizes_hc_tables() {
3655    let mut driver = MatchGeneratorDriver::new(32, 2);
3656
3657    // Initialize at Better — allocates small HC tables (1M hash, 512K chain).
3658    driver.reset(CompressionLevel::Better);
3659    assert_eq!(driver.window_size(), (1u64 << 23));
3660
3661    let mut space = driver.get_next_space();
3662    space[..12].copy_from_slice(b"abcabcabcabc");
3663    space.truncate(12);
3664    driver.commit_space(space);
3665    driver.skip_matching_with_hint(None);
3666
3667    let hc = driver.hc_match_generator.as_ref().unwrap();
3668    let better_hash_len = hc.hash_table.len();
3669    let better_chain_len = hc.chain_table.len();
3670
3671    // Switch to Best — must resize to larger tables.
3672    driver.reset(CompressionLevel::Best);
3673    assert_eq!(driver.window_size(), (1u64 << 24));
3674
3675    // Feed data to trigger ensure_tables with new sizes.
3676    let mut space = driver.get_next_space();
3677    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3678    space.truncate(12);
3679    driver.commit_space(space);
3680    driver.skip_matching_with_hint(None);
3681
3682    let hc = driver.hc_match_generator.as_ref().unwrap();
3683    assert!(
3684        hc.hash_table.len() > better_hash_len,
3685        "Best hash_table ({}) should be larger than Better ({})",
3686        hc.hash_table.len(),
3687        better_hash_len
3688    );
3689    assert!(
3690        hc.chain_table.len() > better_chain_len,
3691        "Best chain_table ({}) should be larger than Better ({})",
3692        hc.chain_table.len(),
3693        better_chain_len
3694    );
3695}
3696
3697#[test]
3698fn prime_with_dictionary_preserves_history_for_first_full_block() {
3699    let mut driver = MatchGeneratorDriver::new(8, 1);
3700    driver.reset(CompressionLevel::Fastest);
3701
3702    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3703
3704    let mut space = driver.get_next_space();
3705    space.clear();
3706    space.extend_from_slice(b"abcdefgh");
3707    driver.commit_space(space);
3708
3709    let mut saw_match = false;
3710    driver.start_matching(|seq| {
3711        if let Sequence::Triple {
3712            literals,
3713            offset,
3714            match_len,
3715        } = seq
3716            && literals.is_empty()
3717            && offset == 8
3718            && match_len >= MIN_MATCH_LEN
3719        {
3720            saw_match = true;
3721        }
3722    });
3723
3724    assert!(
3725        saw_match,
3726        "first full block should still match dictionary-primed history"
3727    );
3728}
3729
3730#[test]
3731fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
3732    let mut driver = MatchGeneratorDriver::new(8, 1);
3733    driver.reset(CompressionLevel::Fastest);
3734
3735    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3736
3737    let mut space = driver.get_next_space();
3738    space.clear();
3739    space.extend_from_slice(b"abcdefgh");
3740    driver.commit_space(space);
3741
3742    let mut saw_match = false;
3743    driver.start_matching(|seq| {
3744        if let Sequence::Triple {
3745            literals,
3746            offset,
3747            match_len,
3748        } = seq
3749            && literals.is_empty()
3750            && offset == 24
3751            && match_len >= MIN_MATCH_LEN
3752        {
3753            saw_match = true;
3754        }
3755    });
3756
3757    assert!(
3758        saw_match,
3759        "dictionary bytes should remain addressable until frame output exceeds the live window"
3760    );
3761}
3762
3763#[test]
3764fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
3765    let mut driver = MatchGeneratorDriver::new(8, 1);
3766    driver.reset(CompressionLevel::Fastest);
3767
3768    driver.prime_with_dictionary(&[], [11, 7, 3]);
3769
3770    assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
3771}
3772
3773#[test]
3774fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
3775    let mut driver = MatchGeneratorDriver::new(8, 1);
3776    driver.reset(CompressionLevel::Level(2));
3777
3778    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3779
3780    let mut space = driver.get_next_space();
3781    space.clear();
3782    space.extend_from_slice(b"abcdefgh");
3783    driver.commit_space(space);
3784
3785    let mut saw_match = false;
3786    driver.start_matching(|seq| {
3787        if let Sequence::Triple {
3788            literals,
3789            offset,
3790            match_len,
3791        } = seq
3792            && literals.is_empty()
3793            && offset == 8
3794            && match_len >= DFAST_MIN_MATCH_LEN
3795        {
3796            saw_match = true;
3797        }
3798    });
3799
3800    assert!(
3801        saw_match,
3802        "dfast backend should match dictionary-primed history in first full block"
3803    );
3804}
3805
3806#[test]
3807fn prime_with_dictionary_does_not_inflate_reported_window_size() {
3808    let mut driver = MatchGeneratorDriver::new(8, 1);
3809    driver.reset(CompressionLevel::Fastest);
3810
3811    let before = driver.window_size();
3812    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3813    let after = driver.window_size();
3814
3815    assert_eq!(
3816        after, before,
3817        "dictionary retention budget must not change reported frame window size"
3818    );
3819}
3820
3821#[test]
3822fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
3823    let mut driver = MatchGeneratorDriver::new(8, 2);
3824    driver.reset(CompressionLevel::Fastest);
3825
3826    // This dictionary leaves a 1-byte tail chunk (capacity=1 suffix table),
3827    // which should never be committed to the matcher window.
3828    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3829
3830    assert!(
3831        driver
3832            .match_generator
3833            .window
3834            .iter()
3835            .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
3836        "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
3837    );
3838}
3839
3840#[test]
3841fn prime_with_dictionary_counts_only_committed_tail_budget() {
3842    let mut driver = MatchGeneratorDriver::new(8, 1);
3843    driver.reset(CompressionLevel::Fastest);
3844
3845    let before = driver.match_generator.max_window_size;
3846    // One full slice plus a 1-byte tail that cannot be committed.
3847    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3848
3849    assert_eq!(
3850        driver.match_generator.max_window_size,
3851        before + 8,
3852        "retention budget must account only for dictionary bytes actually committed to history"
3853    );
3854}
3855
3856#[test]
3857fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
3858    let mut driver = MatchGeneratorDriver::new(8, 1);
3859    driver.reset(CompressionLevel::Level(2));
3860
3861    let before = driver.dfast_matcher().max_window_size;
3862    // One full slice plus a 4-byte tail. Dfast can still use this tail through
3863    // short-hash overlap into the next block, so it should stay retained.
3864    driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
3865
3866    assert_eq!(
3867        driver.dfast_matcher().max_window_size,
3868        before + 12,
3869        "dfast retention budget should include 4-byte dictionary tails"
3870    );
3871}
3872
3873#[test]
3874fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
3875    let mut driver = MatchGeneratorDriver::new(8, 1);
3876    driver.reset(CompressionLevel::Level(4));
3877
3878    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3879
3880    let mut space = driver.get_next_space();
3881    space.clear();
3882    space.extend_from_slice(b"abcdefgh");
3883    driver.commit_space(space);
3884
3885    let mut saw_match = false;
3886    driver.start_matching(|seq| {
3887        if let Sequence::Triple {
3888            literals,
3889            offset,
3890            match_len,
3891        } = seq
3892            && literals.is_empty()
3893            && offset == 8
3894            && match_len >= ROW_MIN_MATCH_LEN
3895        {
3896            saw_match = true;
3897        }
3898    });
3899
3900    assert!(
3901        saw_match,
3902        "row backend should match dictionary-primed history in first full block"
3903    );
3904}
3905
3906#[test]
3907fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
3908    let mut driver = MatchGeneratorDriver::new(8, 1);
3909    driver.reset(CompressionLevel::Level(4));
3910
3911    let base_window = driver.row_matcher().max_window_size;
3912    // Slice size is 8. The trailing byte cannot be committed (<4 tail),
3913    // so it must be subtracted from retained budget.
3914    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3915
3916    assert_eq!(
3917        driver.row_matcher().max_window_size,
3918        base_window + 8,
3919        "row retained window must exclude uncommitted 1-byte tail"
3920    );
3921}
3922
3923#[test]
3924fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
3925    let mut driver = MatchGeneratorDriver::new(8, 1);
3926    driver.reset(CompressionLevel::Level(4));
3927    // Keep live window tiny so dictionary-primed slices are evicted quickly.
3928    driver.row_matcher_mut().max_window_size = 8;
3929    driver.reported_window_size = 8;
3930
3931    let base_window = driver.row_matcher().max_window_size;
3932    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3933    assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
3934
3935    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3936        let mut space = driver.get_next_space();
3937        space.clear();
3938        space.extend_from_slice(block);
3939        driver.commit_space(space);
3940        driver.skip_matching_with_hint(None);
3941    }
3942
3943    assert_eq!(
3944        driver.dictionary_retained_budget, 0,
3945        "dictionary budget should be fully retired once primed dict slices are evicted"
3946    );
3947    assert_eq!(
3948        driver.row_matcher().max_window_size,
3949        base_window,
3950        "retired dictionary budget must not remain reusable for live history"
3951    );
3952}
3953
3954#[test]
3955fn row_get_last_space_and_reset_to_fastest_clears_window() {
3956    let mut driver = MatchGeneratorDriver::new(8, 1);
3957    driver.reset(CompressionLevel::Level(4));
3958
3959    let mut space = driver.get_next_space();
3960    space.clear();
3961    space.extend_from_slice(b"row-data");
3962    driver.commit_space(space);
3963
3964    assert_eq!(driver.get_last_space(), b"row-data");
3965
3966    driver.reset(CompressionLevel::Fastest);
3967    assert_eq!(driver.active_backend, MatcherBackend::Simple);
3968    assert!(driver.row_matcher().window.is_empty());
3969}
3970
3971/// Ensures switching from Row to Simple returns pooled buffers and row tables.
3972#[test]
3973fn driver_reset_from_row_backend_reclaims_row_buffer_pool() {
3974    let mut driver = MatchGeneratorDriver::new(8, 1);
3975    driver.reset(CompressionLevel::Level(4));
3976    assert_eq!(driver.active_backend, MatcherBackend::Row);
3977
3978    // Ensure the row matcher option is initialized so reset() executes
3979    // the Row backend retirement path.
3980    let _ = driver.row_matcher();
3981    let mut space = driver.get_next_space();
3982    space.extend_from_slice(b"row-data-to-recycle");
3983    driver.commit_space(space);
3984
3985    let before_pool = driver.vec_pool.len();
3986    driver.reset(CompressionLevel::Fastest);
3987
3988    assert_eq!(driver.active_backend, MatcherBackend::Simple);
3989    let row = driver
3990        .row_match_generator
3991        .as_ref()
3992        .expect("row matcher should remain allocated after switch");
3993    assert!(row.row_heads.is_empty());
3994    assert!(row.row_positions.is_empty());
3995    assert!(row.row_tags.is_empty());
3996    assert!(
3997        driver.vec_pool.len() >= before_pool,
3998        "row reset should recycle row history buffers"
3999    );
4000}
4001
4002/// Guards the optional row backend retirement path when no row matcher was allocated.
4003#[test]
4004fn driver_reset_from_row_backend_tolerates_missing_row_matcher() {
4005    let mut driver = MatchGeneratorDriver::new(8, 1);
4006    driver.active_backend = MatcherBackend::Row;
4007    driver.row_match_generator = None;
4008
4009    driver.reset(CompressionLevel::Fastest);
4010
4011    assert_eq!(driver.active_backend, MatcherBackend::Simple);
4012}
4013
4014#[test]
4015fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
4016    let mut params = resolve_level_params(CompressionLevel::Level(4), None);
4017    params.window_log = 22;
4018    let adjusted = adjust_params_for_source_size(params, 0);
4019    assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
4020}
4021
4022#[test]
4023fn common_prefix_len_matches_scalar_reference_across_offsets() {
4024    fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
4025        a.iter()
4026            .zip(b.iter())
4027            .take_while(|(lhs, rhs)| lhs == rhs)
4028            .count()
4029    }
4030
4031    for total_len in [
4032        0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
4033    ] {
4034        let base: Vec<u8> = (0..total_len)
4035            .map(|i| ((i * 13 + 7) & 0xFF) as u8)
4036            .collect();
4037
4038        for start in [0usize, 1, 3] {
4039            if start > total_len {
4040                continue;
4041            }
4042            let a = &base[start..];
4043            let b = a.to_vec();
4044            assert_eq!(
4045                MatchGenerator::common_prefix_len(a, &b),
4046                scalar_reference(a, &b),
4047                "equal slices total_len={total_len} start={start}"
4048            );
4049
4050            let len = a.len();
4051            for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
4052                if mismatch >= len {
4053                    continue;
4054                }
4055                let mut altered = b.clone();
4056                altered[mismatch] ^= 0x5A;
4057                assert_eq!(
4058                    MatchGenerator::common_prefix_len(a, &altered),
4059                    scalar_reference(a, &altered),
4060                    "total_len={total_len} start={start} mismatch={mismatch}"
4061                );
4062            }
4063
4064            if len > 0 {
4065                let mismatch = len - 1;
4066                let mut altered = b.clone();
4067                altered[mismatch] ^= 0xA5;
4068                assert_eq!(
4069                    MatchGenerator::common_prefix_len(a, &altered),
4070                    scalar_reference(a, &altered),
4071                    "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
4072                );
4073            }
4074        }
4075    }
4076
4077    let long = alloc::vec![0xAB; 320];
4078    let shorter = alloc::vec![0xAB; 137];
4079    assert_eq!(
4080        MatchGenerator::common_prefix_len(&long, &shorter),
4081        scalar_reference(&long, &shorter)
4082    );
4083}
4084
4085#[test]
4086fn row_pick_lazy_returns_none_when_next_is_better() {
4087    let mut matcher = RowMatchGenerator::new(1 << 22);
4088    matcher.configure(ROW_CONFIG);
4089    matcher.add_data(alloc::vec![b'a'; 64], |_| {});
4090    matcher.ensure_tables();
4091
4092    let abs_pos = matcher.history_abs_start + 16;
4093    let best = MatchCandidate {
4094        start: abs_pos,
4095        offset: 8,
4096        match_len: ROW_MIN_MATCH_LEN,
4097    };
4098    assert!(
4099        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
4100        "lazy picker should defer when next position is clearly better"
4101    );
4102}
4103
4104#[test]
4105fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
4106    let mut matcher = RowMatchGenerator::new(1 << 22);
4107    matcher.configure(ROW_CONFIG);
4108    matcher.lazy_depth = 2;
4109    matcher.search_depth = 0;
4110    matcher.offset_hist = [6, 9, 1];
4111
4112    let mut data = alloc::vec![b'x'; 40];
4113    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
4114    matcher.add_data(data, |_| {});
4115    matcher.ensure_tables();
4116
4117    let abs_pos = matcher.history_abs_start + 20;
4118    let best = matcher
4119        .best_match(abs_pos, 0)
4120        .expect("expected baseline repcode match");
4121    assert_eq!(best.offset, 9);
4122    assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
4123
4124    if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
4125        assert!(next.match_len <= best.match_len);
4126    }
4127
4128    let next2 = matcher
4129        .best_match(abs_pos + 2, 2)
4130        .expect("expected +2 candidate");
4131    assert!(
4132        next2.match_len > best.match_len + 1,
4133        "+2 candidate must be significantly better for depth-2 lazy skip"
4134    );
4135    assert!(
4136        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
4137        "lazy picker should defer when +2 candidate is significantly better"
4138    );
4139}
4140
4141#[test]
4142fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
4143    let mut matcher = RowMatchGenerator::new(1 << 22);
4144    matcher.configure(ROW_CONFIG);
4145    matcher.lazy_depth = 2;
4146    matcher.search_depth = 0;
4147    matcher.offset_hist = [6, 9, 1];
4148
4149    let mut data = alloc::vec![b'x'; 40];
4150    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
4151    matcher.add_data(data, |_| {});
4152    matcher.ensure_tables();
4153
4154    let abs_pos = matcher.history_abs_start + 20;
4155    let best = matcher
4156        .best_match(abs_pos, 0)
4157        .expect("expected baseline repcode match");
4158    assert_eq!(best.offset, 9);
4159    assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
4160
4161    let next2 = matcher
4162        .best_match(abs_pos + 2, 2)
4163        .expect("expected +2 candidate");
4164    assert_eq!(next2.match_len, best.match_len + 1);
4165    let chosen = matcher
4166        .pick_lazy_match(abs_pos, 0, Some(best))
4167        .expect("lazy picker should keep current best");
4168    assert_eq!(chosen.start, best.start);
4169    assert_eq!(chosen.offset, best.offset);
4170    assert_eq!(chosen.match_len, best.match_len);
4171}
4172
4173/// Verifies row/tag extraction uses the high bits of the multiplicative hash.
4174#[test]
4175fn row_hash_and_row_extracts_high_bits() {
4176    let mut matcher = RowMatchGenerator::new(1 << 22);
4177    matcher.configure(ROW_CONFIG);
4178    matcher.add_data(
4179        alloc::vec![
4180            0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
4181            0x70, 0x80,
4182        ],
4183        |_| {},
4184    );
4185    matcher.ensure_tables();
4186
4187    let pos = matcher.history_abs_start + 8;
4188    let (row, tag) = matcher
4189        .hash_and_row(pos)
4190        .expect("row hash should be available");
4191
4192    let idx = pos - matcher.history_abs_start;
4193    let concat = matcher.live_history();
4194    let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
4195    const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
4196    let hash = value.wrapping_mul(PRIME);
4197    let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
4198    let combined = hash >> (u64::BITS as usize - total_bits);
4199    let expected_row =
4200        ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
4201    let expected_tag = combined as u8;
4202
4203    assert_eq!(row, expected_row);
4204    assert_eq!(tag, expected_tag);
4205}
4206
4207#[test]
4208fn row_repcode_skips_candidate_before_history_start() {
4209    let mut matcher = RowMatchGenerator::new(1 << 22);
4210    matcher.configure(ROW_CONFIG);
4211    matcher.history = alloc::vec![b'a'; 20];
4212    matcher.history_start = 0;
4213    matcher.history_abs_start = 10;
4214    matcher.offset_hist = [3, 0, 0];
4215
4216    assert!(matcher.repcode_candidate(12, 1).is_none());
4217}
4218
4219#[test]
4220fn row_repcode_returns_none_when_position_too_close_to_history_end() {
4221    let mut matcher = RowMatchGenerator::new(1 << 22);
4222    matcher.configure(ROW_CONFIG);
4223    matcher.history = b"abcde".to_vec();
4224    matcher.history_start = 0;
4225    matcher.history_abs_start = 0;
4226    matcher.offset_hist = [1, 0, 0];
4227
4228    assert!(matcher.repcode_candidate(4, 1).is_none());
4229}
4230
4231#[test]
4232fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
4233    let mut matcher = RowMatchGenerator::new(1 << 22);
4234    matcher.configure(ROW_CONFIG);
4235    matcher.history = b"abcde".to_vec();
4236    matcher.history_start = 0;
4237    matcher.history_abs_start = 0;
4238
4239    assert!(matcher.row_candidate(0, 0).is_none());
4240}
4241
4242#[test]
4243fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
4244    let mut hc = HcMatchGenerator::new(32);
4245    hc.history = b"abc".to_vec();
4246    hc.history_start = 0;
4247    hc.history_abs_start = 0;
4248    hc.ensure_tables();
4249
4250    let candidates = hc.chain_candidates(0);
4251    assert!(candidates.iter().all(|&pos| pos == usize::MAX));
4252}
4253
4254#[test]
4255fn hc_reset_refills_existing_tables_with_empty_sentinel() {
4256    let mut hc = HcMatchGenerator::new(32);
4257    hc.add_data(b"abcdeabcde".to_vec(), |_| {});
4258    hc.ensure_tables();
4259    assert!(!hc.hash_table.is_empty());
4260    assert!(!hc.chain_table.is_empty());
4261    hc.hash_table.fill(123);
4262    hc.chain_table.fill(456);
4263
4264    hc.reset(|_| {});
4265
4266    assert!(hc.hash_table.iter().all(|&v| v == HC_EMPTY));
4267    assert!(hc.chain_table.iter().all(|&v| v == HC_EMPTY));
4268}
4269
4270#[test]
4271fn hc_start_matching_returns_early_for_empty_current_block() {
4272    let mut hc = HcMatchGenerator::new(32);
4273    hc.add_data(Vec::new(), |_| {});
4274    let mut called = false;
4275    hc.start_matching(|_| called = true);
4276    assert!(!called, "empty current block should not emit sequences");
4277}
4278
4279#[cfg(test)]
4280fn deterministic_high_entropy_bytes(seed: u64, len: usize) -> Vec<u8> {
4281    let mut out = Vec::with_capacity(len);
4282    let mut state = seed;
4283    for _ in 0..len {
4284        state ^= state << 13;
4285        state ^= state >> 7;
4286        state ^= state << 17;
4287        out.push((state >> 40) as u8);
4288    }
4289    out
4290}
4291
4292#[test]
4293fn hc_sparse_skip_matching_preserves_tail_cross_block_match() {
4294    let mut matcher = HcMatchGenerator::new(1 << 22);
4295    let tail = b"Qz9kLm2Rp";
4296    let mut first = deterministic_high_entropy_bytes(0xD1B5_4A32_9C77_0E19, 4096);
4297    let tail_start = first.len() - tail.len();
4298    first[tail_start..].copy_from_slice(tail);
4299    matcher.add_data(first.clone(), |_| {});
4300    matcher.skip_matching(Some(true));
4301
4302    let mut second = tail.to_vec();
4303    second.extend_from_slice(b"after-tail-literals");
4304    matcher.add_data(second, |_| {});
4305
4306    let mut first_sequence = None;
4307    matcher.start_matching(|seq| {
4308        if first_sequence.is_some() {
4309            return;
4310        }
4311        first_sequence = Some(match seq {
4312            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
4313            Sequence::Triple {
4314                literals,
4315                offset,
4316                match_len,
4317            } => (literals.len(), offset, match_len),
4318        });
4319    });
4320
4321    let (literals_len, offset, match_len) =
4322        first_sequence.expect("expected at least one sequence after sparse skip");
4323    assert_eq!(
4324        literals_len, 0,
4325        "first sequence should start at block boundary"
4326    );
4327    assert_eq!(
4328        offset,
4329        tail.len(),
4330        "first match should reference previous tail"
4331    );
4332    assert!(
4333        match_len >= tail.len(),
4334        "tail-aligned cross-block match must be preserved"
4335    );
4336}
4337
4338#[test]
4339fn hc_sparse_skip_matching_does_not_reinsert_sparse_tail_positions() {
4340    let mut matcher = HcMatchGenerator::new(1 << 22);
4341    let first = deterministic_high_entropy_bytes(0xC2B2_AE3D_27D4_EB4F, 4096);
4342    matcher.add_data(first.clone(), |_| {});
4343    matcher.skip_matching(Some(true));
4344
4345    let current_len = first.len();
4346    let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
4347    let current_abs_end = current_abs_start + current_len;
4348    let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
4349    let tail_start = current_abs_end
4350        .saturating_sub(dense_tail)
4351        .max(matcher.history_abs_start)
4352        .max(current_abs_start);
4353
4354    let overlap_pos = (tail_start..current_abs_end)
4355        .find(|&pos| (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP))
4356        .expect("fixture should contain at least one sparse-grid overlap in dense tail");
4357
4358    let rel = matcher
4359        .relative_position(overlap_pos)
4360        .expect("overlap position should be representable as relative position");
4361    let chain_idx = rel as usize & ((1 << matcher.chain_log) - 1);
4362    assert_ne!(
4363        matcher.chain_table[chain_idx],
4364        rel + 1,
4365        "sparse-grid tail positions must not be reinserted (self-loop chain entry)"
4366    );
4367}
4368
4369#[test]
4370fn hc_compact_history_drains_when_threshold_crossed() {
4371    let mut hc = HcMatchGenerator::new(8);
4372    hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
4373    hc.history_start = 16;
4374    hc.compact_history();
4375    assert_eq!(hc.history_start, 0);
4376    assert_eq!(hc.history, b"qrstuvwxyz");
4377}
4378
4379#[test]
4380fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
4381    let mut hc = HcMatchGenerator::new(32);
4382    hc.history = b"abcdefghijklmnop".to_vec();
4383    hc.history_abs_start = 0;
4384    hc.position_base = 1;
4385    hc.ensure_tables();
4386    let before_hash = hc.hash_table.clone();
4387    let before_chain = hc.chain_table.clone();
4388
4389    hc.insert_position_no_rebase(0);
4390
4391    assert_eq!(hc.hash_table, before_hash);
4392    assert_eq!(hc.chain_table, before_chain);
4393}
4394
4395#[test]
4396fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
4397    let mut driver = MatchGeneratorDriver::new(8, 1);
4398    driver.reset(CompressionLevel::Fastest);
4399    // Use a small live window so dictionary-primed slices are evicted
4400    // quickly and budget retirement can be asserted deterministically.
4401    driver.match_generator.max_window_size = 8;
4402    driver.reported_window_size = 8;
4403
4404    let base_window = driver.match_generator.max_window_size;
4405    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4406    assert_eq!(driver.match_generator.max_window_size, base_window + 24);
4407
4408    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4409        let mut space = driver.get_next_space();
4410        space.clear();
4411        space.extend_from_slice(block);
4412        driver.commit_space(space);
4413        driver.skip_matching_with_hint(None);
4414    }
4415
4416    assert_eq!(
4417        driver.dictionary_retained_budget, 0,
4418        "dictionary budget should be fully retired once primed dict slices are evicted"
4419    );
4420    assert_eq!(
4421        driver.match_generator.max_window_size, base_window,
4422        "retired dictionary budget must not remain reusable for live history"
4423    );
4424}
4425
4426#[test]
4427fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
4428    let mut driver = MatchGeneratorDriver::new(8, 1);
4429    driver.reset(CompressionLevel::Level(2));
4430    // Use a small live window in this regression so dictionary-primed slices are
4431    // evicted quickly and budget retirement can be asserted deterministically.
4432    driver.dfast_matcher_mut().max_window_size = 8;
4433    driver.reported_window_size = 8;
4434
4435    let base_window = driver.dfast_matcher().max_window_size;
4436    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4437    assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
4438
4439    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4440        let mut space = driver.get_next_space();
4441        space.clear();
4442        space.extend_from_slice(block);
4443        driver.commit_space(space);
4444        driver.skip_matching_with_hint(None);
4445    }
4446
4447    assert_eq!(
4448        driver.dictionary_retained_budget, 0,
4449        "dictionary budget should be fully retired once primed dict slices are evicted"
4450    );
4451    assert_eq!(
4452        driver.dfast_matcher().max_window_size,
4453        base_window,
4454        "retired dictionary budget must not remain reusable for live history"
4455    );
4456}
4457
4458#[test]
4459fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
4460    let mut driver = MatchGeneratorDriver::new(8, 1);
4461    driver.reset(CompressionLevel::Better);
4462
4463    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
4464
4465    let mut space = driver.get_next_space();
4466    space.clear();
4467    // Repeat the dictionary content so the HC matcher can find it.
4468    // HC_MIN_MATCH_LEN is 5, so an 8-byte match is well above threshold.
4469    space.extend_from_slice(b"abcdefgh");
4470    driver.commit_space(space);
4471
4472    let mut saw_match = false;
4473    driver.start_matching(|seq| {
4474        if let Sequence::Triple {
4475            literals,
4476            offset,
4477            match_len,
4478        } = seq
4479            && literals.is_empty()
4480            && offset == 8
4481            && match_len >= HC_MIN_MATCH_LEN
4482        {
4483            saw_match = true;
4484        }
4485    });
4486
4487    assert!(
4488        saw_match,
4489        "hash-chain backend should match dictionary-primed history in first full block"
4490    );
4491}
4492
4493#[test]
4494fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
4495    let mut driver = MatchGeneratorDriver::new(8, 1);
4496    driver.reset(CompressionLevel::Better);
4497    // Use a small live window so dictionary-primed slices are evicted quickly.
4498    driver.hc_matcher_mut().max_window_size = 8;
4499    driver.reported_window_size = 8;
4500
4501    let base_window = driver.hc_matcher().max_window_size;
4502    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4503    assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
4504
4505    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4506        let mut space = driver.get_next_space();
4507        space.clear();
4508        space.extend_from_slice(block);
4509        driver.commit_space(space);
4510        driver.skip_matching_with_hint(None);
4511    }
4512
4513    assert_eq!(
4514        driver.dictionary_retained_budget, 0,
4515        "dictionary budget should be fully retired once primed dict slices are evicted"
4516    );
4517    assert_eq!(
4518        driver.hc_matcher().max_window_size,
4519        base_window,
4520        "retired dictionary budget must not remain reusable for live history"
4521    );
4522}
4523
4524#[test]
4525fn hc_rebases_positions_after_u32_boundary() {
4526    let mut matcher = HcMatchGenerator::new(64);
4527    matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4528    matcher.ensure_tables();
4529    matcher.position_base = 0;
4530    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
4531        Ok(value) => value,
4532        Err(_) => return,
4533    };
4534    // Simulate a long-running stream where absolute history positions crossed
4535    // the u32 range. Before #51 this disabled HC inserts entirely.
4536    matcher.history_abs_start = history_abs_start;
4537    matcher.skip_matching(None);
4538    assert_eq!(
4539        matcher.position_base, matcher.history_abs_start,
4540        "rebase should anchor to the oldest live absolute position"
4541    );
4542
4543    assert!(
4544        matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY),
4545        "HC hash table should still be populated after crossing u32 boundary"
4546    );
4547
4548    // Verify rebasing preserves candidate lookup, not just table population.
4549    let abs_pos = matcher.history_abs_start + 10;
4550    let candidates = matcher.chain_candidates(abs_pos);
4551    assert!(
4552        candidates.iter().any(|candidate| *candidate != usize::MAX),
4553        "chain_candidates should return valid matches after rebase"
4554    );
4555}
4556
4557#[test]
4558fn hc_rebase_rebuilds_only_inserted_prefix() {
4559    let mut matcher = HcMatchGenerator::new(64);
4560    matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4561    matcher.ensure_tables();
4562    matcher.position_base = 0;
4563    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
4564        Ok(value) => value,
4565        Err(_) => return,
4566    };
4567    matcher.history_abs_start = history_abs_start;
4568    let abs_pos = matcher.history_abs_start + 6;
4569
4570    let mut expected = HcMatchGenerator::new(64);
4571    expected.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4572    expected.ensure_tables();
4573    expected.history_abs_start = history_abs_start;
4574    expected.position_base = expected.history_abs_start;
4575    expected.hash_table.fill(HC_EMPTY);
4576    expected.chain_table.fill(HC_EMPTY);
4577    for pos in expected.history_abs_start..abs_pos {
4578        expected.insert_position_no_rebase(pos);
4579    }
4580
4581    matcher.maybe_rebase_positions(abs_pos);
4582
4583    assert_eq!(
4584        matcher.position_base, matcher.history_abs_start,
4585        "rebase should still anchor to the oldest live absolute position"
4586    );
4587    assert_eq!(
4588        matcher.hash_table, expected.hash_table,
4589        "rebase must rebuild only positions already inserted before abs_pos"
4590    );
4591    assert_eq!(
4592        matcher.chain_table, expected.chain_table,
4593        "future positions must not be pre-seeded into HC chains during rebase"
4594    );
4595}
4596
4597#[test]
4598fn suffix_store_with_single_slot_does_not_panic_on_keying() {
4599    let mut suffixes = SuffixStore::with_capacity(1);
4600    suffixes.insert(b"abcde", 0);
4601    assert!(suffixes.contains_key(b"abcde"));
4602    assert_eq!(suffixes.get(b"abcde"), Some(0));
4603}
4604
4605#[test]
4606fn fastest_reset_uses_interleaved_hash_fill_step() {
4607    let mut driver = MatchGeneratorDriver::new(32, 2);
4608
4609    driver.reset(CompressionLevel::Uncompressed);
4610    assert_eq!(driver.match_generator.hash_fill_step, 1);
4611
4612    driver.reset(CompressionLevel::Fastest);
4613    assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
4614
4615    // Better uses the HashChain backend with lazy2; verify that the backend switch
4616    // happened and the lazy_depth is configured correctly.
4617    driver.reset(CompressionLevel::Better);
4618    assert_eq!(driver.active_backend, MatcherBackend::HashChain);
4619    assert_eq!(driver.window_size(), (1u64 << 23));
4620    assert_eq!(driver.hc_matcher().lazy_depth, 2);
4621}
4622
4623#[test]
4624fn simple_matcher_updates_offset_history_after_emitting_match() {
4625    let mut matcher = MatchGenerator::new(64);
4626    matcher.add_data(
4627        b"abcdeabcdeabcde".to_vec(),
4628        SuffixStore::with_capacity(64),
4629        |_, _| {},
4630    );
4631
4632    assert!(matcher.next_sequence(|seq| {
4633        assert_eq!(
4634            seq,
4635            Sequence::Triple {
4636                literals: b"abcde",
4637                offset: 5,
4638                match_len: 10,
4639            }
4640        );
4641    }));
4642    assert_eq!(matcher.offset_hist, [5, 1, 4]);
4643}
4644
4645#[test]
4646fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
4647    let mut matcher = MatchGenerator::new(64);
4648    matcher.add_data(
4649        b"abcdefghijabcdefghij".to_vec(),
4650        SuffixStore::with_capacity(64),
4651        |_, _| {},
4652    );
4653
4654    matcher.suffix_idx = 10;
4655    matcher.last_idx_in_sequence = 10;
4656    matcher.offset_hist = [99, 10, 4];
4657
4658    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4659    assert_eq!(candidate, Some((10, 10)));
4660}
4661
4662#[test]
4663fn simple_matcher_repcode_can_target_previous_window_entry() {
4664    let mut matcher = MatchGenerator::new(64);
4665    matcher.add_data(
4666        b"abcdefghij".to_vec(),
4667        SuffixStore::with_capacity(64),
4668        |_, _| {},
4669    );
4670    matcher.skip_matching();
4671    matcher.add_data(
4672        b"abcdefghij".to_vec(),
4673        SuffixStore::with_capacity(64),
4674        |_, _| {},
4675    );
4676
4677    matcher.offset_hist = [99, 10, 4];
4678
4679    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
4680    assert_eq!(candidate, Some((10, 10)));
4681}
4682
4683#[test]
4684fn simple_matcher_zero_literal_repcode_checks_rep2() {
4685    let mut matcher = MatchGenerator::new(64);
4686    matcher.add_data(
4687        b"abcdefghijabcdefghij".to_vec(),
4688        SuffixStore::with_capacity(64),
4689        |_, _| {},
4690    );
4691    matcher.suffix_idx = 10;
4692    matcher.last_idx_in_sequence = 10;
4693    // rep1=4 does not match at idx 10, rep2=10 does.
4694    matcher.offset_hist = [99, 4, 10];
4695
4696    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4697    assert_eq!(candidate, Some((10, 10)));
4698}
4699
4700#[test]
4701fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
4702    let mut matcher = MatchGenerator::new(64);
4703    matcher.add_data(
4704        b"abcdefghijabcdefghij".to_vec(),
4705        SuffixStore::with_capacity(64),
4706        |_, _| {},
4707    );
4708    matcher.suffix_idx = 10;
4709    matcher.last_idx_in_sequence = 10;
4710    // rep1=4 and rep2=99 do not match; rep0-1 == 10 does.
4711    matcher.offset_hist = [11, 4, 99];
4712
4713    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4714    assert_eq!(candidate, Some((10, 10)));
4715}
4716
4717#[test]
4718fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
4719    let mut matcher = MatchGenerator::new(64);
4720    matcher.add_data(
4721        b"abcdefghij".to_vec(),
4722        SuffixStore::with_capacity(64),
4723        |_, _| {},
4724    );
4725    matcher.skip_matching();
4726    matcher.add_data(
4727        b"klmnopqrst".to_vec(),
4728        SuffixStore::with_capacity(64),
4729        |_, _| {},
4730    );
4731    matcher.suffix_idx = 3;
4732
4733    let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
4734    assert_eq!(candidate, None);
4735}
4736
4737#[test]
4738fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
4739    let mut matcher = MatchGenerator::new(64);
4740    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4741    matcher.add_data(
4742        b"abcdefghijklmnop".to_vec(),
4743        SuffixStore::with_capacity(64),
4744        |_, _| {},
4745    );
4746    matcher.skip_matching();
4747    matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
4748
4749    assert!(matcher.next_sequence(|seq| {
4750        assert_eq!(
4751            seq,
4752            Sequence::Triple {
4753                literals: b"",
4754                offset: 15,
4755                match_len: 5,
4756            }
4757        );
4758    }));
4759    assert!(!matcher.next_sequence(|_| {}));
4760}
4761
4762#[test]
4763fn simple_matcher_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
4764    let mut matcher = MatchGenerator::new(128);
4765    let first = b"abcdefghijklmnopqrstuvwxyz012345".to_vec();
4766    let sparse_probe = first[3..3 + MIN_MATCH_LEN].to_vec();
4767    let tail_start = first.len() - MIN_MATCH_LEN;
4768    let tail_probe = first[tail_start..tail_start + MIN_MATCH_LEN].to_vec();
4769    matcher.add_data(first, SuffixStore::with_capacity(256), |_, _| {});
4770
4771    matcher.skip_matching_with_hint(Some(true));
4772
4773    // Observable behavior check: sparse-prefix probe should not immediately match.
4774    matcher.add_data(sparse_probe, SuffixStore::with_capacity(256), |_, _| {});
4775    let mut sparse_first_is_literals = None;
4776    assert!(matcher.next_sequence(|seq| {
4777        if sparse_first_is_literals.is_none() {
4778            sparse_first_is_literals = Some(matches!(seq, Sequence::Literals { .. }));
4779        }
4780    }));
4781    assert!(
4782        sparse_first_is_literals.unwrap_or(false),
4783        "sparse-start probe should not produce an immediate match"
4784    );
4785
4786    // Dense tail remains indexed for cross-block boundary matching.
4787    let mut matcher = MatchGenerator::new(128);
4788    matcher.add_data(
4789        b"abcdefghijklmnopqrstuvwxyz012345".to_vec(),
4790        SuffixStore::with_capacity(256),
4791        |_, _| {},
4792    );
4793    matcher.skip_matching_with_hint(Some(true));
4794    matcher.add_data(tail_probe, SuffixStore::with_capacity(256), |_, _| {});
4795    let mut tail_first_is_immediate_match = None;
4796    assert!(matcher.next_sequence(|seq| {
4797        if tail_first_is_immediate_match.is_none() {
4798            tail_first_is_immediate_match =
4799                Some(matches!(seq, Sequence::Triple { literals, .. } if literals.is_empty()));
4800        }
4801    }));
4802    assert!(
4803        tail_first_is_immediate_match.unwrap_or(false),
4804        "dense tail probe should match immediately at block start"
4805    );
4806}
4807
4808#[test]
4809fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
4810    let mut matcher = MatchGenerator::new(64);
4811    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4812    matcher.add_data(
4813        b"01234abcde".to_vec(),
4814        SuffixStore::with_capacity(64),
4815        |_, _| {},
4816    );
4817    matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
4818
4819    let last = matcher.window.last().unwrap();
4820    let tail = &last.data[5..10];
4821    assert_eq!(last.suffixes.get(tail), Some(5));
4822}
4823
4824#[test]
4825fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
4826    let mut matcher = MatchGenerator::new(128);
4827    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4828    matcher.add_data(
4829        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4830        SuffixStore::with_capacity(1 << 16),
4831        |_, _| {},
4832    );
4833
4834    matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
4835
4836    let last = matcher.window.last().unwrap();
4837    let first_key = &last.data[..MIN_MATCH_LEN];
4838    assert_eq!(last.suffixes.get(first_key), None);
4839}
4840
4841#[test]
4842fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
4843    let mut matcher = MatchGenerator::new(128);
4844    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4845    matcher.add_data(
4846        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4847        SuffixStore::with_capacity(1 << 16),
4848        |_, _| {},
4849    );
4850
4851    matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
4852
4853    let last = matcher.window.last().unwrap();
4854    for pos in [0usize, 3, 6, 9, 12] {
4855        let key = &last.data[pos..pos + MIN_MATCH_LEN];
4856        assert_eq!(
4857            last.suffixes.get(key),
4858            Some(pos),
4859            "expected interleaved suffix registration at pos {pos}"
4860        );
4861    }
4862}
4863
4864#[test]
4865fn dfast_skip_matching_handles_window_eviction() {
4866    let mut matcher = DfastMatchGenerator::new(16);
4867
4868    matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
4869    matcher.skip_matching(None);
4870    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4871    matcher.skip_matching(None);
4872    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4873
4874    let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
4875    matcher.start_matching(|seq| match seq {
4876        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4877        Sequence::Triple {
4878            literals,
4879            offset,
4880            match_len,
4881        } => {
4882            reconstructed.extend_from_slice(literals);
4883            let start = reconstructed.len() - offset;
4884            for i in 0..match_len {
4885                let byte = reconstructed[start + i];
4886                reconstructed.push(byte);
4887            }
4888        }
4889    });
4890
4891    assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
4892}
4893
4894#[test]
4895fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
4896    let mut matcher = DfastMatchGenerator::new(8);
4897
4898    let mut first = Vec::with_capacity(64);
4899    first.extend_from_slice(b"abcdefgh");
4900    matcher.add_data(first, |_| {});
4901
4902    let mut second = Vec::with_capacity(64);
4903    second.extend_from_slice(b"ijklmnop");
4904
4905    let mut observed_evicted_len = None;
4906    matcher.add_data(second, |data| {
4907        observed_evicted_len = Some(data.len());
4908    });
4909
4910    assert_eq!(
4911        observed_evicted_len,
4912        Some(8),
4913        "eviction callback must report evicted byte length, not backing capacity"
4914    );
4915}
4916
4917#[test]
4918fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
4919    let mut matcher = DfastMatchGenerator::new(16);
4920
4921    let mut first = Vec::with_capacity(64);
4922    first.extend_from_slice(b"abcdefgh");
4923    matcher.add_data(first, |_| {});
4924
4925    let mut second = Vec::with_capacity(64);
4926    second.extend_from_slice(b"ijklmnop");
4927    matcher.add_data(second, |_| {});
4928
4929    matcher.max_window_size = 8;
4930
4931    let mut observed_evicted_len = None;
4932    matcher.trim_to_window(|data| {
4933        observed_evicted_len = Some(data.len());
4934    });
4935
4936    assert_eq!(
4937        observed_evicted_len,
4938        Some(8),
4939        "trim callback must report evicted byte length, not backing capacity"
4940    );
4941}
4942
4943#[test]
4944fn dfast_inserts_tail_positions_for_next_block_matching() {
4945    let mut matcher = DfastMatchGenerator::new(1 << 22);
4946
4947    matcher.add_data(b"012345bcdea".to_vec(), |_| {});
4948    let mut history = Vec::new();
4949    matcher.start_matching(|seq| match seq {
4950        Sequence::Literals { literals } => history.extend_from_slice(literals),
4951        Sequence::Triple { .. } => unreachable!("first block should not match history"),
4952    });
4953    assert_eq!(history, b"012345bcdea");
4954
4955    matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
4956    let mut saw_first_sequence = false;
4957    matcher.start_matching(|seq| {
4958        assert!(!saw_first_sequence, "expected a single cross-block match");
4959        saw_first_sequence = true;
4960        match seq {
4961            Sequence::Literals { .. } => {
4962                panic!("expected tail-anchored cross-block match before any literals")
4963            }
4964            Sequence::Triple {
4965                literals,
4966                offset,
4967                match_len,
4968            } => {
4969                assert_eq!(literals, b"");
4970                assert_eq!(offset, 5);
4971                assert_eq!(match_len, 11);
4972                let start = history.len() - offset;
4973                for i in 0..match_len {
4974                    let byte = history[start + i];
4975                    history.push(byte);
4976                }
4977            }
4978        }
4979    });
4980
4981    assert!(
4982        saw_first_sequence,
4983        "expected tail-anchored cross-block match"
4984    );
4985    assert_eq!(history, b"012345bcdeabcdeabcdeab");
4986}
4987
4988#[test]
4989fn dfast_dense_skip_matching_backfills_previous_tail_for_next_block() {
4990    let mut matcher = DfastMatchGenerator::new(1 << 22);
4991    let tail = b"Qz9kLm2Rp";
4992    let mut first = b"0123456789abcdef".to_vec();
4993    first.extend_from_slice(tail);
4994    matcher.add_data(first.clone(), |_| {});
4995    matcher.skip_matching(Some(false));
4996
4997    let mut second = tail.to_vec();
4998    second.extend_from_slice(b"after-tail-literals");
4999    matcher.add_data(second, |_| {});
5000
5001    let mut first_sequence = None;
5002    matcher.start_matching(|seq| {
5003        if first_sequence.is_some() {
5004            return;
5005        }
5006        first_sequence = Some(match seq {
5007            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
5008            Sequence::Triple {
5009                literals,
5010                offset,
5011                match_len,
5012            } => (literals.len(), offset, match_len),
5013        });
5014    });
5015
5016    let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
5017    assert_eq!(
5018        lit_len, 0,
5019        "expected immediate cross-block match at block start"
5020    );
5021    assert_eq!(
5022        offset,
5023        tail.len(),
5024        "expected dense skip to preserve cross-boundary tail match"
5025    );
5026    assert!(
5027        match_len >= DFAST_MIN_MATCH_LEN,
5028        "match length should satisfy dfast minimum match length"
5029    );
5030}
5031
5032#[test]
5033fn dfast_sparse_skip_matching_preserves_tail_cross_block_match() {
5034    let mut matcher = DfastMatchGenerator::new(1 << 22);
5035    let tail = b"Qz9kLm2Rp";
5036    let mut first = deterministic_high_entropy_bytes(0x9E37_79B9_7F4A_7C15, 4096);
5037    let tail_start = first.len() - tail.len();
5038    first[tail_start..].copy_from_slice(tail);
5039    matcher.add_data(first.clone(), |_| {});
5040
5041    matcher.skip_matching(Some(true));
5042
5043    let mut second = tail.to_vec();
5044    second.extend_from_slice(b"after-tail-literals");
5045    matcher.add_data(second, |_| {});
5046
5047    let mut first_sequence = None;
5048    matcher.start_matching(|seq| {
5049        if first_sequence.is_some() {
5050            return;
5051        }
5052        first_sequence = Some(match seq {
5053            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
5054            Sequence::Triple {
5055                literals,
5056                offset,
5057                match_len,
5058            } => (literals.len(), offset, match_len),
5059        });
5060    });
5061
5062    let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
5063    assert_eq!(
5064        lit_len, 0,
5065        "expected immediate cross-block match at block start"
5066    );
5067    assert_eq!(
5068        offset,
5069        tail.len(),
5070        "expected match against densely seeded tail"
5071    );
5072    assert!(
5073        match_len >= DFAST_MIN_MATCH_LEN,
5074        "match length should satisfy dfast minimum match length"
5075    );
5076}
5077
5078#[test]
5079fn dfast_skip_matching_dense_backfills_newly_hashable_long_tail_positions() {
5080    let mut matcher = DfastMatchGenerator::new(1 << 22);
5081    let first = deterministic_high_entropy_bytes(0x7A64_0315_D4E1_91C3, 4096);
5082    let first_len = first.len();
5083    matcher.add_data(first, |_| {});
5084    matcher.skip_matching_dense();
5085
5086    // Appending one byte makes exactly the previous block's last 7 starts
5087    // newly eligible for 8-byte long-hash insertion.
5088    matcher.add_data(alloc::vec![0xAB], |_| {});
5089    matcher.skip_matching_dense();
5090
5091    let target_abs_pos = first_len - 7;
5092    let target_rel = target_abs_pos - matcher.history_abs_start;
5093    let live = matcher.live_history();
5094    assert!(
5095        target_rel + 8 <= live.len(),
5096        "fixture must make the boundary start long-hashable"
5097    );
5098    let long_hash = matcher.hash8(&live[target_rel..]);
5099    assert!(
5100        matcher.long_hash[long_hash].contains(&target_abs_pos),
5101        "dense skip must seed long-hash entry for newly hashable boundary start"
5102    );
5103}
5104
5105#[test]
5106fn dfast_seed_remaining_hashable_starts_seeds_last_short_hash_positions() {
5107    let mut matcher = DfastMatchGenerator::new(1 << 20);
5108    let block = deterministic_high_entropy_bytes(0x13F0_9A6D_55CE_7B21, 64);
5109    matcher.add_data(block, |_| {});
5110    matcher.ensure_hash_tables();
5111
5112    let current_len = matcher.window.back().unwrap().len();
5113    let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
5114    let seed_start = current_len - DFAST_MIN_MATCH_LEN;
5115    matcher.seed_remaining_hashable_starts(current_abs_start, current_len, seed_start);
5116
5117    let target_abs_pos = current_abs_start + current_len - 4;
5118    let target_rel = target_abs_pos - matcher.history_abs_start;
5119    let live = matcher.live_history();
5120    assert!(
5121        target_rel + 4 <= live.len(),
5122        "fixture must leave the last short-hash start valid"
5123    );
5124    let short_hash = matcher.hash4(&live[target_rel..]);
5125    assert!(
5126        matcher.short_hash[short_hash].contains(&target_abs_pos),
5127        "tail seeding must include the last 4-byte-hashable start"
5128    );
5129}
5130
5131#[test]
5132fn dfast_seed_remaining_hashable_starts_handles_pos_at_block_end() {
5133    let mut matcher = DfastMatchGenerator::new(1 << 20);
5134    let block = deterministic_high_entropy_bytes(0x7BB2_DA91_441E_C0EF, 64);
5135    matcher.add_data(block, |_| {});
5136    matcher.ensure_hash_tables();
5137
5138    let current_len = matcher.window.back().unwrap().len();
5139    let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
5140    matcher.seed_remaining_hashable_starts(current_abs_start, current_len, current_len);
5141
5142    let target_abs_pos = current_abs_start + current_len - 4;
5143    let target_rel = target_abs_pos - matcher.history_abs_start;
5144    let live = matcher.live_history();
5145    assert!(
5146        target_rel + 4 <= live.len(),
5147        "fixture must leave the last short-hash start valid"
5148    );
5149    let short_hash = matcher.hash4(&live[target_rel..]);
5150    assert!(
5151        matcher.short_hash[short_hash].contains(&target_abs_pos),
5152        "tail seeding must still include the last 4-byte-hashable start when pos is at block end"
5153    );
5154}
5155
5156#[test]
5157fn dfast_sparse_skip_matching_backfills_previous_tail_for_consecutive_sparse_blocks() {
5158    let mut matcher = DfastMatchGenerator::new(1 << 22);
5159    let boundary_prefix = [0xFA, 0xFB, 0xFC];
5160    let boundary_suffix = [0xFD, 0xEE, 0xAD, 0xBE, 0xEF, 0x11, 0x22, 0x33];
5161
5162    let mut first = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
5163    let first_tail_start = first.len() - boundary_prefix.len();
5164    first[first_tail_start..].copy_from_slice(&boundary_prefix);
5165    matcher.add_data(first, |_| {});
5166    matcher.skip_matching(Some(true));
5167
5168    let mut second = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
5169    second[..boundary_suffix.len()].copy_from_slice(&boundary_suffix);
5170    matcher.add_data(second.clone(), |_| {});
5171    matcher.skip_matching(Some(true));
5172
5173    let mut third = boundary_prefix.to_vec();
5174    third.extend_from_slice(&boundary_suffix);
5175    third.extend_from_slice(b"-trailing-literals");
5176    matcher.add_data(third, |_| {});
5177
5178    let mut first_sequence = None;
5179    matcher.start_matching(|seq| {
5180        if first_sequence.is_some() {
5181            return;
5182        }
5183        first_sequence = Some(match seq {
5184            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
5185            Sequence::Triple {
5186                literals,
5187                offset,
5188                match_len,
5189            } => (literals.len(), offset, match_len),
5190        });
5191    });
5192
5193    let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
5194    assert_eq!(
5195        lit_len, 0,
5196        "expected immediate match from the prior sparse-skip boundary"
5197    );
5198    assert_eq!(
5199        offset,
5200        second.len() + boundary_prefix.len(),
5201        "expected match against backfilled first→second boundary start"
5202    );
5203    assert!(
5204        match_len >= DFAST_MIN_MATCH_LEN,
5205        "match length should satisfy dfast minimum match length"
5206    );
5207}
5208
5209#[test]
5210fn fastest_hint_iteration_23_sequences_reconstruct_source() {
5211    fn generate_data(seed: u64, len: usize) -> Vec<u8> {
5212        let mut state = seed;
5213        let mut data = Vec::with_capacity(len);
5214        for _ in 0..len {
5215            state = state
5216                .wrapping_mul(6364136223846793005)
5217                .wrapping_add(1442695040888963407);
5218            data.push((state >> 33) as u8);
5219        }
5220        data
5221    }
5222
5223    let i = 23u64;
5224    let len = (i * 89 % 16384) as usize;
5225    let mut data = generate_data(i, len);
5226    // Append a repeated slice so the fixture deterministically exercises
5227    // the match path (Sequence::Triple) instead of only literals.
5228    let repeat = data[128..256].to_vec();
5229    data.extend_from_slice(&repeat);
5230    data.extend_from_slice(&repeat);
5231
5232    let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
5233    driver.set_source_size_hint(data.len() as u64);
5234    driver.reset(CompressionLevel::Fastest);
5235    let mut space = driver.get_next_space();
5236    space[..data.len()].copy_from_slice(&data);
5237    space.truncate(data.len());
5238    driver.commit_space(space);
5239
5240    let mut rebuilt = Vec::with_capacity(data.len());
5241    let mut saw_triple = false;
5242    driver.start_matching(|seq| match seq {
5243        Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
5244        Sequence::Triple {
5245            literals,
5246            offset,
5247            match_len,
5248        } => {
5249            saw_triple = true;
5250            rebuilt.extend_from_slice(literals);
5251            assert!(offset > 0, "offset must be non-zero");
5252            assert!(
5253                offset <= rebuilt.len(),
5254                "offset must reference already-produced bytes: offset={} produced={}",
5255                offset,
5256                rebuilt.len()
5257            );
5258            let start = rebuilt.len() - offset;
5259            for idx in 0..match_len {
5260                let b = rebuilt[start + idx];
5261                rebuilt.push(b);
5262            }
5263        }
5264    });
5265
5266    assert!(saw_triple, "fixture must emit at least one match");
5267    assert_eq!(rebuilt, data);
5268}