Skip to main content

structured_zstd/encoding/
match_generator.rs

1//! Matching algorithm used find repeated parts in the original data
2//!
3//! The Zstd format relies on finden repeated sequences of data and compressing these sequences as instructions to the decoder.
4//! A sequence basically tells the decoder "Go back X bytes and copy Y bytes to the end of your decode buffer".
5//!
6//! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data.
7
8use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10// SIMD/CRC intrinsics now live in `crate::encoding::fastpath::*` where they
11// sit under per-CPU `#[target_feature]` umbrellas; no architecture-specific
12// intrinsic imports remain in this file.
13use core::convert::TryInto;
14use core::num::NonZeroUsize;
15
16use super::BETTER_WINDOW_LOG;
17use super::CompressionLevel;
18use super::Matcher;
19use super::Sequence;
20use super::blocks::encode_offset_with_history;
21use super::incompressible::{block_looks_incompressible, block_looks_incompressible_strict};
22#[cfg(all(
23    test,
24    feature = "std",
25    target_arch = "aarch64",
26    target_endian = "little"
27))]
28use std::arch::is_aarch64_feature_detected;
29#[cfg(all(
30    test,
31    feature = "std",
32    any(target_arch = "x86", target_arch = "x86_64")
33))]
34use std::arch::is_x86_feature_detected;
35
36const MIN_MATCH_LEN: usize = 5;
37const FAST_HASH_FILL_STEP: usize = 3;
38const INCOMPRESSIBLE_SKIP_STEP: usize = 8;
39const DFAST_MIN_MATCH_LEN: usize = 6;
40const DFAST_SHORT_HASH_LOOKAHEAD: usize = 4;
41const ROW_MIN_MATCH_LEN: usize = 6;
42const DFAST_TARGET_LEN: usize = 48;
43// Keep these aligned with the issue's zstd level-3/dfast target unless ratio
44// measurements show we can shrink them without regressing acceptance tests.
45const DFAST_HASH_BITS: usize = 20;
46const DFAST_SEARCH_DEPTH: usize = 4;
47const DFAST_EMPTY_SLOT: usize = usize::MAX;
48const DFAST_SKIP_SEARCH_STRENGTH: usize = 6;
49const DFAST_SKIP_STEP_GROWTH_INTERVAL: usize = 1 << DFAST_SKIP_SEARCH_STRENGTH;
50const DFAST_LOCAL_SKIP_TRIGGER: usize = 256;
51const DFAST_MAX_SKIP_STEP: usize = 8;
52const DFAST_INCOMPRESSIBLE_SKIP_STEP: usize = 16;
53const ROW_HASH_BITS: usize = 20;
54const ROW_LOG: usize = 5;
55const ROW_SEARCH_DEPTH: usize = 16;
56const ROW_TARGET_LEN: usize = 48;
57const ROW_TAG_BITS: usize = 8;
58const ROW_EMPTY_SLOT: usize = usize::MAX;
59const ROW_HASH_KEY_LEN: usize = 4;
60// HASH_MIX_PRIME now lives in `crate::encoding::fastpath::scalar`; the four
61// per-CPU `hash_mix_u64` variants share it via that module.
62const HC_PRIME3BYTES: u32 = 506_832_829;
63const HC_PRIME4BYTES: u32 = 2_654_435_761;
64
65const HC_HASH_LOG: usize = 20;
66const HC_CHAIN_LOG: usize = 19;
67const HC3_HASH_LOG: usize = 17;
68const HC3_MAX_OFFSET: usize = 1 << 18;
69const HC_SEARCH_DEPTH: usize = 16;
70const HC_MIN_MATCH_LEN: usize = 4;
71const HC_OPT_MIN_MATCH_LEN: usize = HC_FORMAT_MINMATCH;
72const HC_TARGET_LEN: usize = 48;
73// Positions are stored as (relative_pos + 1) so that 0 is a safe empty
74// sentinel that can never collide with any valid position.
75const HC_EMPTY: u32 = 0;
76
77// Maximum search depth across all HC-based levels. Used to size the
78// fixed-length candidate array returned by chain_candidates().
79const MAX_HC_SEARCH_DEPTH: usize = 512;
80
81#[derive(Copy, Clone, Debug, Eq, PartialEq)]
82enum HcParseMode {
83    Lazy2,
84    BtOpt,
85    BtUltra,
86    BtUltra2,
87}
88
89/// Bundled tuning knobs for the hash-chain matcher. Using a typed config
90/// instead of positional `usize` args eliminates parameter-order hazards.
91#[derive(Copy, Clone)]
92struct HcConfig {
93    hash_log: usize,
94    chain_log: usize,
95    search_depth: usize,
96    target_len: usize,
97    parse_mode: HcParseMode,
98}
99
100#[derive(Copy, Clone)]
101struct RowConfig {
102    hash_bits: usize,
103    row_log: usize,
104    search_depth: usize,
105    target_len: usize,
106}
107
108const HC_CONFIG: HcConfig = HcConfig {
109    hash_log: HC_HASH_LOG,
110    chain_log: HC_CHAIN_LOG,
111    search_depth: HC_SEARCH_DEPTH,
112    target_len: HC_TARGET_LEN,
113    parse_mode: HcParseMode::Lazy2,
114};
115
116/// Best-level: deeper search, larger tables, higher target length.
117const BEST_HC_CONFIG: HcConfig = HcConfig {
118    hash_log: 21,
119    chain_log: 20,
120    search_depth: 32,
121    target_len: 128,
122    parse_mode: HcParseMode::Lazy2,
123};
124
125const BTOPT_HC_CONFIG: HcConfig = HcConfig {
126    hash_log: 23,
127    chain_log: 22,
128    search_depth: 32,
129    target_len: 256,
130    parse_mode: HcParseMode::BtOpt,
131};
132
133const BTULTRA_HC_CONFIG: HcConfig = HcConfig {
134    hash_log: 23,
135    chain_log: 23,
136    search_depth: 32,
137    target_len: 256,
138    parse_mode: HcParseMode::BtUltra,
139};
140
141const BTULTRA2_HC_CONFIG: HcConfig = HcConfig {
142    hash_log: 24,
143    chain_log: 24,
144    search_depth: 512,
145    target_len: 256,
146    parse_mode: HcParseMode::BtUltra2,
147};
148
149const BTULTRA2_HC_CONFIG_L22: HcConfig = HcConfig {
150    hash_log: 25,
151    chain_log: 27,
152    search_depth: 512,
153    target_len: 999,
154    parse_mode: HcParseMode::BtUltra2,
155};
156
157const BTULTRA2_HC_CONFIG_L22_256K: HcConfig = HcConfig {
158    hash_log: 19,
159    chain_log: 19,
160    search_depth: 1 << 13,
161    target_len: 999,
162    parse_mode: HcParseMode::BtUltra2,
163};
164
165const BTULTRA2_HC_CONFIG_L22_128K: HcConfig = HcConfig {
166    hash_log: 17,
167    chain_log: 18,
168    search_depth: 1 << 11,
169    target_len: 999,
170    parse_mode: HcParseMode::BtUltra2,
171};
172
173const BTULTRA2_HC_CONFIG_L22_16K: HcConfig = HcConfig {
174    hash_log: 15,
175    chain_log: 15,
176    search_depth: 1 << 10,
177    target_len: 999,
178    parse_mode: HcParseMode::BtUltra2,
179};
180
181const ROW_CONFIG: RowConfig = RowConfig {
182    hash_bits: ROW_HASH_BITS,
183    row_log: ROW_LOG,
184    search_depth: ROW_SEARCH_DEPTH,
185    target_len: ROW_TARGET_LEN,
186};
187
188/// Resolved tuning parameters for a compression level.
189#[derive(Copy, Clone)]
190struct LevelParams {
191    backend: MatcherBackend,
192    window_log: u8,
193    hash_fill_step: usize,
194    lazy_depth: u8,
195    hc: HcConfig,
196    row: RowConfig,
197}
198
199fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
200    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
201    window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
202}
203
204fn row_hash_bits_for_window(max_window_size: usize) -> usize {
205    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
206    window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
207}
208
209/// Parameter table for numeric compression levels 1–22.
210///
211/// Each entry maps a zstd compression level to the best-available matcher
212/// backend and tuning knobs. High levels map to dedicated parse modes:
213/// btopt (16-17), btultra (18-19), btultra2 (20-22).
214///
215/// Index 0 = level 1, index 21 = level 22.
216#[rustfmt::skip]
217const LEVEL_TABLE: [LevelParams; 22] = [
218    // Lvl  Strategy       wlog  step  lazy  HC config                                   row config
219    // ---  -------------- ----  ----  ----  ------------------------------------------  ----------
220    /* 1 */ LevelParams { backend: MatcherBackend::Simple,    window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
221    /* 2 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
222    /* 3 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
223    /* 4 */ LevelParams { backend: MatcherBackend::Row,       window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
224    /* 5 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4,  target_len: 32,  parse_mode: HcParseMode::Lazy2 }, row: ROW_CONFIG },
225    /* 6 */ LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8,  target_len: 48,  parse_mode: HcParseMode::Lazy2 }, row: ROW_CONFIG },
226    /* 7 */ LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48,  parse_mode: HcParseMode::Lazy2 }, row: ROW_CONFIG },
227    /* 8 */ LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64,  parse_mode: HcParseMode::Lazy2 }, row: ROW_CONFIG },
228    /* 9 */ LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64,  parse_mode: HcParseMode::Lazy2 }, row: ROW_CONFIG },
229    /*10 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96,  parse_mode: HcParseMode::Lazy2 }, row: ROW_CONFIG },
230    /*11 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
231    /*12 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128, parse_mode: HcParseMode::Lazy2 }, row: ROW_CONFIG },
232    /*13 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160, parse_mode: HcParseMode::Lazy2 }, row: ROW_CONFIG },
233    /*14 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192, parse_mode: HcParseMode::Lazy2 }, row: ROW_CONFIG },
234    /*15 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192, parse_mode: HcParseMode::Lazy2 }, row: ROW_CONFIG },
235    /*16 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTOPT_HC_CONFIG, row: ROW_CONFIG },
236    /*17 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTOPT_HC_CONFIG, row: ROW_CONFIG },
237    /*18 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA_HC_CONFIG, row: ROW_CONFIG },
238    /*19 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA_HC_CONFIG, row: ROW_CONFIG },
239    /*20 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG, row: ROW_CONFIG },
240    /*21 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG, row: ROW_CONFIG },
241    /*22 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 27, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG_L22, row: ROW_CONFIG },
242];
243
244/// Smallest window_log the encoder will use regardless of source size.
245const MIN_WINDOW_LOG: u8 = 10;
246/// Conservative floor for source-size-hinted window tuning.
247///
248/// Hinted windows below 16 KiB (`window_log < 14`) currently regress C-FFI
249/// interoperability on certain compressed-block patterns. Keep hinted
250/// windows at 16 KiB or larger until that compatibility gap is closed.
251const MIN_HINTED_WINDOW_LOG: u8 = 14;
252
253/// Adjust level parameters for a known source size.
254///
255/// This derives a cap from `ceil(log2(src_size))`, then clamps it to
256/// [`MIN_HINTED_WINDOW_LOG`] (16 KiB). A zero-byte size hint is treated as
257/// [`MIN_WINDOW_LOG`] for the raw ceil-log step and then promoted to the hinted
258/// floor. This keeps tables bounded for small inputs while preserving the
259/// encoder's baseline minimum supported window.
260/// For the HC backend, `hash_log` and `chain_log` are reduced
261/// proportionally.
262fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
263    // Derive a source-size-based cap from ceil(log2(src_size)), then
264    // clamp first to MIN_WINDOW_LOG (baseline encoder minimum) and then to
265    // MIN_HINTED_WINDOW_LOG (16 KiB hinted floor). For tiny or zero hints we
266    // therefore keep a 16 KiB effective minimum window in hinted mode.
267    let src_log = if src_size == 0 {
268        MIN_WINDOW_LOG
269    } else {
270        (64 - (src_size - 1).leading_zeros()) as u8 // ceil_log2
271    };
272    let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
273    if src_log < params.window_log {
274        params.window_log = src_log;
275    }
276    // For HC backend: also cap hash_log and chain_log so tables are
277    // proportional to the source, avoiding multi-MB allocations for
278    // tiny inputs.
279    if params.backend == MatcherBackend::HashChain {
280        if (src_log + 2) < params.hc.hash_log as u8 {
281            params.hc.hash_log = (src_log + 2) as usize;
282        }
283        if (src_log + 1) < params.hc.chain_log as u8 {
284            params.hc.chain_log = (src_log + 1) as usize;
285        }
286    } else if params.backend == MatcherBackend::Row {
287        let max_window_size = 1usize << params.window_log;
288        params.row.hash_bits = row_hash_bits_for_window(max_window_size);
289    }
290    params
291}
292
293fn level22_btultra2_params_for_source_size(source_size: Option<u64>) -> LevelParams {
294    let mut hc = match source_size {
295        Some(size) if size <= 16 * 1024 => BTULTRA2_HC_CONFIG_L22_16K,
296        Some(size) if size <= 128 * 1024 => BTULTRA2_HC_CONFIG_L22_128K,
297        Some(size) if size <= 256 * 1024 => BTULTRA2_HC_CONFIG_L22_256K,
298        _ => BTULTRA2_HC_CONFIG_L22,
299    };
300    let mut window_log = match source_size {
301        Some(size) if size <= 16 * 1024 => 14,
302        Some(size) if size <= 128 * 1024 => 17,
303        Some(size) if size <= 256 * 1024 => 18,
304        _ => 27,
305    };
306    if let Some(size) = source_size
307        && size > 256 * 1024
308    {
309        let src_log = if size == 0 {
310            MIN_WINDOW_LOG
311        } else {
312            (64 - (size - 1).leading_zeros()) as u8
313        };
314        window_log = window_log.min(src_log.max(MIN_WINDOW_LOG));
315        let adjusted_table_log = window_log as usize + 1;
316        hc.hash_log = hc.hash_log.min(adjusted_table_log);
317        hc.chain_log = hc.chain_log.min(adjusted_table_log);
318    }
319    LevelParams {
320        backend: MatcherBackend::HashChain,
321        window_log,
322        hash_fill_step: 1,
323        lazy_depth: 2,
324        hc,
325        row: ROW_CONFIG,
326    }
327}
328
329/// Resolve a [`CompressionLevel`] to internal tuning parameters,
330/// optionally adjusted for a known source size.
331fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
332    if matches!(level, CompressionLevel::Level(22)) {
333        return level22_btultra2_params_for_source_size(source_size);
334    }
335    let params = match level {
336        CompressionLevel::Uncompressed => LevelParams {
337            backend: MatcherBackend::Simple,
338            window_log: 17,
339            hash_fill_step: 1,
340            lazy_depth: 0,
341            hc: HC_CONFIG,
342            row: ROW_CONFIG,
343        },
344        CompressionLevel::Fastest => LEVEL_TABLE[0],
345        CompressionLevel::Default => LEVEL_TABLE[2],
346        CompressionLevel::Better => LEVEL_TABLE[6],
347        CompressionLevel::Best => LEVEL_TABLE[10],
348        CompressionLevel::Level(n) => {
349            if n > 0 {
350                let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
351                LEVEL_TABLE[idx]
352            } else if n == 0 {
353                // Level 0 = default, matching C zstd semantics.
354                LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
355            } else {
356                // Negative levels: ultra-fast with the Simple backend.
357                // Acceleration grows with magnitude, expressed as larger
358                // hash_fill_step (fewer positions indexed).
359                let acceleration =
360                    (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
361                let step = (acceleration + 3).min(128);
362                LevelParams {
363                    backend: MatcherBackend::Simple,
364                    window_log: 17,
365                    hash_fill_step: step,
366                    lazy_depth: 0,
367                    hc: HC_CONFIG,
368                    row: ROW_CONFIG,
369                }
370            }
371        }
372    };
373    if let Some(size) = source_size {
374        adjust_params_for_source_size(params, size)
375    } else {
376        params
377    }
378}
379
380#[derive(Copy, Clone, Debug, PartialEq, Eq)]
381enum MatcherBackend {
382    Simple,
383    Dfast,
384    Row,
385    HashChain,
386}
387
388/// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible.
389pub struct MatchGeneratorDriver {
390    vec_pool: Vec<Vec<u8>>,
391    suffix_pool: Vec<SuffixStore>,
392    match_generator: MatchGenerator,
393    dfast_match_generator: Option<DfastMatchGenerator>,
394    row_match_generator: Option<RowMatchGenerator>,
395    hc_match_generator: Option<HcMatchGenerator>,
396    active_backend: MatcherBackend,
397    slice_size: usize,
398    base_slice_size: usize,
399    // Frame header window size must stay at the configured live-window budget.
400    // Dictionary retention expands internal matcher capacity only.
401    reported_window_size: usize,
402    // Tracks currently retained bytes that originated from primed dictionary
403    // history and have not been evicted yet.
404    dictionary_retained_budget: usize,
405    // Source size hint for next frame (set via set_source_size_hint, cleared on reset).
406    source_size_hint: Option<u64>,
407}
408
409impl MatchGeneratorDriver {
410    /// `slice_size` sets the base block allocation size used for matcher input chunks.
411    /// `max_slices_in_window` determines the initial window capacity at construction
412    /// time. Effective window sizing is recalculated on every [`reset`](Self::reset)
413    /// from the resolved compression level and optional source-size hint.
414    pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
415        let max_window_size = max_slices_in_window * slice_size;
416        Self {
417            vec_pool: Vec::new(),
418            suffix_pool: Vec::new(),
419            match_generator: MatchGenerator::new(max_window_size),
420            dfast_match_generator: None,
421            row_match_generator: None,
422            hc_match_generator: None,
423            active_backend: MatcherBackend::Simple,
424            slice_size,
425            base_slice_size: slice_size,
426            reported_window_size: max_window_size,
427            dictionary_retained_budget: 0,
428            source_size_hint: None,
429        }
430    }
431
432    fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
433        resolve_level_params(level, source_size)
434    }
435
436    fn dfast_matcher(&self) -> &DfastMatchGenerator {
437        self.dfast_match_generator
438            .as_ref()
439            .expect("dfast backend must be initialized by reset() before use")
440    }
441
442    fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
443        self.dfast_match_generator
444            .as_mut()
445            .expect("dfast backend must be initialized by reset() before use")
446    }
447
448    fn row_matcher(&self) -> &RowMatchGenerator {
449        self.row_match_generator
450            .as_ref()
451            .expect("row backend must be initialized by reset() before use")
452    }
453
454    fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
455        self.row_match_generator
456            .as_mut()
457            .expect("row backend must be initialized by reset() before use")
458    }
459
460    fn hc_matcher(&self) -> &HcMatchGenerator {
461        self.hc_match_generator
462            .as_ref()
463            .expect("hash chain backend must be initialized by reset() before use")
464    }
465
466    fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
467        self.hc_match_generator
468            .as_mut()
469            .expect("hash chain backend must be initialized by reset() before use")
470    }
471
472    fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
473        let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
474        if reclaimed == 0 {
475            return;
476        }
477        self.dictionary_retained_budget -= reclaimed;
478        match self.active_backend {
479            MatcherBackend::Simple => {
480                self.match_generator.max_window_size = self
481                    .match_generator
482                    .max_window_size
483                    .saturating_sub(reclaimed);
484            }
485            MatcherBackend::Dfast => {
486                let matcher = self.dfast_matcher_mut();
487                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
488            }
489            MatcherBackend::Row => {
490                let matcher = self.row_matcher_mut();
491                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
492            }
493            MatcherBackend::HashChain => {
494                let matcher = self.hc_matcher_mut();
495                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
496            }
497        }
498    }
499
500    fn trim_after_budget_retire(&mut self) {
501        loop {
502            let mut evicted_bytes = 0usize;
503            match self.active_backend {
504                MatcherBackend::Simple => {
505                    let vec_pool = &mut self.vec_pool;
506                    let suffix_pool = &mut self.suffix_pool;
507                    self.match_generator.reserve(0, |mut data, mut suffixes| {
508                        evicted_bytes += data.len();
509                        data.resize(data.capacity(), 0);
510                        vec_pool.push(data);
511                        suffixes.slots.clear();
512                        suffixes.slots.resize(suffixes.slots.capacity(), None);
513                        suffix_pool.push(suffixes);
514                    });
515                }
516                MatcherBackend::Dfast => {
517                    let mut retired = Vec::new();
518                    self.dfast_matcher_mut().trim_to_window(|data| {
519                        evicted_bytes += data.len();
520                        retired.push(data);
521                    });
522                    for mut data in retired {
523                        data.resize(data.capacity(), 0);
524                        self.vec_pool.push(data);
525                    }
526                }
527                MatcherBackend::Row => {
528                    let mut retired = Vec::new();
529                    self.row_matcher_mut().trim_to_window(|data| {
530                        evicted_bytes += data.len();
531                        retired.push(data);
532                    });
533                    for mut data in retired {
534                        data.resize(data.capacity(), 0);
535                        self.vec_pool.push(data);
536                    }
537                }
538                MatcherBackend::HashChain => {
539                    let mut retired = Vec::new();
540                    self.hc_matcher_mut().trim_to_window(|data| {
541                        evicted_bytes += data.len();
542                        retired.push(data);
543                    });
544                    for mut data in retired {
545                        data.resize(data.capacity(), 0);
546                        self.vec_pool.push(data);
547                    }
548                }
549            }
550            if evicted_bytes == 0 {
551                break;
552            }
553            self.retire_dictionary_budget(evicted_bytes);
554        }
555    }
556
557    fn skip_matching_for_dictionary_priming(&mut self) {
558        match self.active_backend {
559            MatcherBackend::Simple => self.match_generator.skip_matching_with_hint(Some(false)),
560            MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching_dense(),
561            MatcherBackend::Row => self.row_matcher_mut().skip_matching_with_hint(Some(false)),
562            MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(Some(false)),
563        }
564    }
565}
566
567impl Matcher for MatchGeneratorDriver {
568    fn supports_dictionary_priming(&self) -> bool {
569        true
570    }
571
572    fn set_source_size_hint(&mut self, size: u64) {
573        self.source_size_hint = Some(size);
574    }
575
576    fn reset(&mut self, level: CompressionLevel) {
577        let hint = self.source_size_hint.take();
578        let hinted = hint.is_some();
579        let params = Self::level_params(level, hint);
580        let max_window_size = 1usize << params.window_log;
581        self.dictionary_retained_budget = 0;
582        if self.active_backend != params.backend {
583            match self.active_backend {
584                MatcherBackend::Simple => {
585                    let vec_pool = &mut self.vec_pool;
586                    let suffix_pool = &mut self.suffix_pool;
587                    self.match_generator.reset(|mut data, mut suffixes| {
588                        data.resize(data.capacity(), 0);
589                        vec_pool.push(data);
590                        suffixes.slots.clear();
591                        suffixes.slots.resize(suffixes.slots.capacity(), None);
592                        suffix_pool.push(suffixes);
593                    });
594                }
595                MatcherBackend::Dfast => {
596                    if let Some(dfast) = self.dfast_match_generator.as_mut() {
597                        let vec_pool = &mut self.vec_pool;
598                        dfast.reset(|mut data| {
599                            data.resize(data.capacity(), 0);
600                            vec_pool.push(data);
601                        });
602                    }
603                }
604                MatcherBackend::Row => {
605                    if let Some(row) = self.row_match_generator.as_mut() {
606                        row.row_heads = Vec::new();
607                        row.row_positions = Vec::new();
608                        row.row_tags = Vec::new();
609                        let vec_pool = &mut self.vec_pool;
610                        row.reset(|mut data| {
611                            data.resize(data.capacity(), 0);
612                            vec_pool.push(data);
613                        });
614                    }
615                }
616                MatcherBackend::HashChain => {
617                    if let Some(hc) = self.hc_match_generator.as_mut() {
618                        // Release oversized tables when switching away from
619                        // HashChain so Best's larger allocations don't persist.
620                        hc.hash_table = Vec::new();
621                        hc.chain_table = Vec::new();
622                        let vec_pool = &mut self.vec_pool;
623                        hc.reset(|mut data| {
624                            data.resize(data.capacity(), 0);
625                            vec_pool.push(data);
626                        });
627                    }
628                }
629            }
630        }
631
632        self.active_backend = params.backend;
633        self.slice_size = self.base_slice_size.min(max_window_size);
634        self.reported_window_size = max_window_size;
635        match self.active_backend {
636            MatcherBackend::Simple => {
637                let vec_pool = &mut self.vec_pool;
638                let suffix_pool = &mut self.suffix_pool;
639                self.match_generator.max_window_size = max_window_size;
640                self.match_generator.hash_fill_step = params.hash_fill_step;
641                self.match_generator.reset(|mut data, mut suffixes| {
642                    data.resize(data.capacity(), 0);
643                    vec_pool.push(data);
644                    suffixes.slots.clear();
645                    suffixes.slots.resize(suffixes.slots.capacity(), None);
646                    suffix_pool.push(suffixes);
647                });
648            }
649            MatcherBackend::Dfast => {
650                let dfast = self
651                    .dfast_match_generator
652                    .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
653                dfast.max_window_size = max_window_size;
654                dfast.lazy_depth = params.lazy_depth;
655                dfast.use_fast_loop = matches!(
656                    level,
657                    CompressionLevel::Default
658                        | CompressionLevel::Level(0)
659                        | CompressionLevel::Level(3)
660                );
661                dfast.set_hash_bits(if hinted {
662                    dfast_hash_bits_for_window(max_window_size)
663                } else {
664                    DFAST_HASH_BITS
665                });
666                let vec_pool = &mut self.vec_pool;
667                dfast.reset(|mut data| {
668                    data.resize(data.capacity(), 0);
669                    vec_pool.push(data);
670                });
671            }
672            MatcherBackend::Row => {
673                let row = self
674                    .row_match_generator
675                    .get_or_insert_with(|| RowMatchGenerator::new(max_window_size));
676                row.max_window_size = max_window_size;
677                row.lazy_depth = params.lazy_depth;
678                row.configure(params.row);
679                if hinted {
680                    row.set_hash_bits(row_hash_bits_for_window(max_window_size));
681                }
682                let vec_pool = &mut self.vec_pool;
683                row.reset(|mut data| {
684                    data.resize(data.capacity(), 0);
685                    vec_pool.push(data);
686                });
687            }
688            MatcherBackend::HashChain => {
689                let hc = self
690                    .hc_match_generator
691                    .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
692                hc.max_window_size = max_window_size;
693                hc.lazy_depth = params.lazy_depth;
694                hc.configure(params.hc, params.window_log);
695                let vec_pool = &mut self.vec_pool;
696                hc.reset(|mut data| {
697                    data.resize(data.capacity(), 0);
698                    vec_pool.push(data);
699                });
700            }
701        }
702    }
703
704    fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
705        match self.active_backend {
706            MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
707            MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
708            MatcherBackend::Row => self.row_matcher_mut().offset_hist = offset_hist,
709            MatcherBackend::HashChain => {
710                let matcher = self.hc_matcher_mut();
711                matcher.offset_hist = offset_hist;
712                matcher.mark_dictionary_primed();
713            }
714        }
715
716        if dict_content.is_empty() {
717            return;
718        }
719
720        // Dictionary bytes should stay addressable until produced frame output
721        // itself exceeds the live window size.
722        let retained_dict_budget = dict_content.len();
723        match self.active_backend {
724            MatcherBackend::Simple => {
725                self.match_generator.max_window_size = self
726                    .match_generator
727                    .max_window_size
728                    .saturating_add(retained_dict_budget);
729            }
730            MatcherBackend::Dfast => {
731                let matcher = self.dfast_matcher_mut();
732                matcher.max_window_size =
733                    matcher.max_window_size.saturating_add(retained_dict_budget);
734            }
735            MatcherBackend::Row => {
736                let matcher = self.row_matcher_mut();
737                matcher.max_window_size =
738                    matcher.max_window_size.saturating_add(retained_dict_budget);
739            }
740            MatcherBackend::HashChain => {
741                let matcher = self.hc_matcher_mut();
742                matcher.max_window_size =
743                    matcher.max_window_size.saturating_add(retained_dict_budget);
744            }
745        }
746
747        let mut start = 0usize;
748        let mut committed_dict_budget = 0usize;
749        // insert_position needs 4 bytes of lookahead for hashing;
750        // backfill_boundary_positions re-visits tail positions once the
751        // next slice extends history, but cannot hash <4 byte fragments.
752        let min_primed_tail = match self.active_backend {
753            MatcherBackend::Simple => MIN_MATCH_LEN,
754            MatcherBackend::Dfast | MatcherBackend::Row | MatcherBackend::HashChain => 4,
755        };
756        while start < dict_content.len() {
757            let end = (start + self.slice_size).min(dict_content.len());
758            if end - start < min_primed_tail {
759                break;
760            }
761            let mut space = self.get_next_space();
762            space.clear();
763            space.extend_from_slice(&dict_content[start..end]);
764            self.commit_space(space);
765            self.skip_matching_for_dictionary_priming();
766            committed_dict_budget += end - start;
767            start = end;
768        }
769
770        let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
771        if uncommitted_tail_budget > 0 {
772            match self.active_backend {
773                MatcherBackend::Simple => {
774                    self.match_generator.max_window_size = self
775                        .match_generator
776                        .max_window_size
777                        .saturating_sub(uncommitted_tail_budget);
778                }
779                MatcherBackend::Dfast => {
780                    let matcher = self.dfast_matcher_mut();
781                    matcher.max_window_size = matcher
782                        .max_window_size
783                        .saturating_sub(uncommitted_tail_budget);
784                }
785                MatcherBackend::Row => {
786                    let matcher = self.row_matcher_mut();
787                    matcher.max_window_size = matcher
788                        .max_window_size
789                        .saturating_sub(uncommitted_tail_budget);
790                }
791                MatcherBackend::HashChain => {
792                    let matcher = self.hc_matcher_mut();
793                    matcher.max_window_size = matcher
794                        .max_window_size
795                        .saturating_sub(uncommitted_tail_budget);
796                }
797            }
798        }
799        if committed_dict_budget > 0 {
800            self.dictionary_retained_budget = self
801                .dictionary_retained_budget
802                .saturating_add(committed_dict_budget);
803        }
804        if self.active_backend == MatcherBackend::HashChain {
805            self.hc_matcher_mut()
806                .set_dictionary_limit_from_primed_bytes(committed_dict_budget);
807        }
808    }
809
810    fn seed_dictionary_entropy(
811        &mut self,
812        huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
813        ll: Option<&crate::fse::fse_encoder::FSETable>,
814        ml: Option<&crate::fse::fse_encoder::FSETable>,
815        of: Option<&crate::fse::fse_encoder::FSETable>,
816    ) {
817        if self.active_backend == MatcherBackend::HashChain {
818            self.hc_matcher_mut()
819                .seed_dictionary_entropy(huff, ll, ml, of);
820        }
821    }
822
823    fn window_size(&self) -> u64 {
824        self.reported_window_size as u64
825    }
826
827    fn get_next_space(&mut self) -> Vec<u8> {
828        if let Some(mut space) = self.vec_pool.pop() {
829            if space.len() > self.slice_size {
830                space.truncate(self.slice_size);
831            }
832            if space.len() < self.slice_size {
833                space.resize(self.slice_size, 0);
834            }
835            return space;
836        }
837        alloc::vec![0; self.slice_size]
838    }
839
840    fn get_last_space(&mut self) -> &[u8] {
841        match self.active_backend {
842            MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
843            MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
844            MatcherBackend::Row => self.row_matcher().get_last_space(),
845            MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
846        }
847    }
848
849    fn commit_space(&mut self, space: Vec<u8>) {
850        match self.active_backend {
851            MatcherBackend::Simple => {
852                let vec_pool = &mut self.vec_pool;
853                let mut evicted_bytes = 0usize;
854                let suffixes = match self.suffix_pool.pop() {
855                    Some(store) if store.slots.len() >= space.len() => store,
856                    _ => SuffixStore::with_capacity(space.len()),
857                };
858                let suffix_pool = &mut self.suffix_pool;
859                self.match_generator
860                    .add_data(space, suffixes, |mut data, mut suffixes| {
861                        evicted_bytes += data.len();
862                        data.resize(data.capacity(), 0);
863                        vec_pool.push(data);
864                        suffixes.slots.clear();
865                        suffixes.slots.resize(suffixes.slots.capacity(), None);
866                        suffix_pool.push(suffixes);
867                    });
868                self.retire_dictionary_budget(evicted_bytes);
869                self.trim_after_budget_retire();
870            }
871            MatcherBackend::Dfast => {
872                let vec_pool = &mut self.vec_pool;
873                let mut evicted_bytes = 0usize;
874                self.dfast_match_generator
875                    .as_mut()
876                    .expect("dfast backend must be initialized by reset() before use")
877                    .add_data(space, |mut data| {
878                        evicted_bytes += data.len();
879                        data.resize(data.capacity(), 0);
880                        vec_pool.push(data);
881                    });
882                self.retire_dictionary_budget(evicted_bytes);
883                self.trim_after_budget_retire();
884            }
885            MatcherBackend::Row => {
886                let vec_pool = &mut self.vec_pool;
887                let mut evicted_bytes = 0usize;
888                self.row_match_generator
889                    .as_mut()
890                    .expect("row backend must be initialized by reset() before use")
891                    .add_data(space, |mut data| {
892                        evicted_bytes += data.len();
893                        data.resize(data.capacity(), 0);
894                        vec_pool.push(data);
895                    });
896                self.retire_dictionary_budget(evicted_bytes);
897                self.trim_after_budget_retire();
898            }
899            MatcherBackend::HashChain => {
900                let vec_pool = &mut self.vec_pool;
901                let mut evicted_bytes = 0usize;
902                self.hc_match_generator
903                    .as_mut()
904                    .expect("hash chain backend must be initialized by reset() before use")
905                    .add_data(space, |mut data| {
906                        evicted_bytes += data.len();
907                        data.resize(data.capacity(), 0);
908                        vec_pool.push(data);
909                    });
910                self.retire_dictionary_budget(evicted_bytes);
911                self.trim_after_budget_retire();
912            }
913        }
914    }
915
916    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
917        match self.active_backend {
918            MatcherBackend::Simple => {
919                while self.match_generator.next_sequence(&mut handle_sequence) {}
920            }
921            MatcherBackend::Dfast => self
922                .dfast_matcher_mut()
923                .start_matching(&mut handle_sequence),
924            MatcherBackend::Row => self.row_matcher_mut().start_matching(&mut handle_sequence),
925            MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
926        }
927    }
928
929    fn skip_matching(&mut self) {
930        self.skip_matching_with_hint(None);
931    }
932
933    fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
934        match self.active_backend {
935            MatcherBackend::Simple => self
936                .match_generator
937                .skip_matching_with_hint(incompressible_hint),
938            MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(incompressible_hint),
939            MatcherBackend::Row => self
940                .row_matcher_mut()
941                .skip_matching_with_hint(incompressible_hint),
942            MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(incompressible_hint),
943        }
944    }
945}
946
947/// This stores the index of a suffix of a string by hashing the first few bytes of that suffix
948/// This means that collisions just overwrite and that you need to check validity after a get
949struct SuffixStore {
950    // We use NonZeroUsize to enable niche optimization here.
951    // On store we do +1 and on get -1
952    // This is ok since usize::MAX is never a valid offset
953    slots: Vec<Option<NonZeroUsize>>,
954    len_log: u32,
955}
956
957impl SuffixStore {
958    fn with_capacity(capacity: usize) -> Self {
959        Self {
960            slots: alloc::vec![None; capacity],
961            len_log: capacity.ilog2(),
962        }
963    }
964
965    #[inline(always)]
966    fn insert(&mut self, suffix: &[u8], idx: usize) {
967        let key = self.key(suffix);
968        self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
969    }
970
971    #[inline(always)]
972    fn contains_key(&self, suffix: &[u8]) -> bool {
973        let key = self.key(suffix);
974        self.slots[key].is_some()
975    }
976
977    #[inline(always)]
978    fn get(&self, suffix: &[u8]) -> Option<usize> {
979        let key = self.key(suffix);
980        self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
981    }
982
983    #[inline(always)]
984    fn key(&self, suffix: &[u8]) -> usize {
985        // Capacity=1 yields len_log=0; shifting by 64 would panic.
986        if self.len_log == 0 {
987            return 0;
988        }
989
990        let s0 = suffix[0] as u64;
991        let s1 = suffix[1] as u64;
992        let s2 = suffix[2] as u64;
993        let s3 = suffix[3] as u64;
994        let s4 = suffix[4] as u64;
995
996        const POLY: u64 = 0xCF3BCCDCABu64;
997
998        let s0 = (s0 << 24).wrapping_mul(POLY);
999        let s1 = (s1 << 32).wrapping_mul(POLY);
1000        let s2 = (s2 << 40).wrapping_mul(POLY);
1001        let s3 = (s3 << 48).wrapping_mul(POLY);
1002        let s4 = (s4 << 56).wrapping_mul(POLY);
1003
1004        let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
1005        let index = index >> (64 - self.len_log);
1006        index as usize % self.slots.len()
1007    }
1008}
1009
1010/// We keep a window of a few of these entries
1011/// All of these are valid targets for a match to be generated for
1012struct WindowEntry {
1013    data: Vec<u8>,
1014    /// Stores indexes into data
1015    suffixes: SuffixStore,
1016    /// Makes offset calculations efficient
1017    base_offset: usize,
1018}
1019
1020pub(crate) struct MatchGenerator {
1021    max_window_size: usize,
1022    /// Data window we are operating on to find matches
1023    /// The data we want to find matches for is in the last slice
1024    window: Vec<WindowEntry>,
1025    window_size: usize,
1026    #[cfg(debug_assertions)]
1027    concat_window: Vec<u8>,
1028    /// Index in the last slice that we already processed
1029    suffix_idx: usize,
1030    /// Gets updated when a new sequence is returned to point right behind that sequence
1031    last_idx_in_sequence: usize,
1032    hash_fill_step: usize,
1033    offset_hist: [u32; 3],
1034}
1035
1036impl MatchGenerator {
1037    /// max_size defines how many bytes will be used at most in the window used for matching
1038    fn new(max_size: usize) -> Self {
1039        Self {
1040            max_window_size: max_size,
1041            window: Vec::new(),
1042            window_size: 0,
1043            #[cfg(debug_assertions)]
1044            concat_window: Vec::new(),
1045            suffix_idx: 0,
1046            last_idx_in_sequence: 0,
1047            hash_fill_step: 1,
1048            offset_hist: [1, 4, 8],
1049        }
1050    }
1051
1052    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1053        self.window_size = 0;
1054        #[cfg(debug_assertions)]
1055        self.concat_window.clear();
1056        self.suffix_idx = 0;
1057        self.last_idx_in_sequence = 0;
1058        self.offset_hist = [1, 4, 8];
1059        self.window.drain(..).for_each(|entry| {
1060            reuse_space(entry.data, entry.suffixes);
1061        });
1062    }
1063
1064    /// Processes bytes in the current window until either a match is found or no more matches can be found
1065    /// * If a match is found handle_sequence is called with the Triple variant
1066    /// * If no more matches can be found but there are bytes still left handle_sequence is called with the Literals variant
1067    /// * If no more matches can be found and no more bytes are left this returns false
1068    fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
1069        loop {
1070            let last_entry = self.window.last().unwrap();
1071            let data_slice = &last_entry.data;
1072
1073            // We already reached the end of the window, check if we need to return a Literals{}
1074            if self.suffix_idx >= data_slice.len() {
1075                if self.last_idx_in_sequence != self.suffix_idx {
1076                    let literals = &data_slice[self.last_idx_in_sequence..];
1077                    self.last_idx_in_sequence = self.suffix_idx;
1078                    handle_sequence(Sequence::Literals { literals });
1079                    return true;
1080                } else {
1081                    return false;
1082                }
1083            }
1084
1085            // If the remaining data is smaller than the minimum match length we can stop and return a Literals{}
1086            let data_slice = &data_slice[self.suffix_idx..];
1087            if data_slice.len() < MIN_MATCH_LEN {
1088                let last_idx_in_sequence = self.last_idx_in_sequence;
1089                self.last_idx_in_sequence = last_entry.data.len();
1090                self.suffix_idx = last_entry.data.len();
1091                handle_sequence(Sequence::Literals {
1092                    literals: &last_entry.data[last_idx_in_sequence..],
1093                });
1094                return true;
1095            }
1096
1097            // This is the key we are looking to find a match for
1098            let key = &data_slice[..MIN_MATCH_LEN];
1099            let literals_len = self.suffix_idx - self.last_idx_in_sequence;
1100
1101            // Look in each window entry
1102            let mut candidate = self.repcode_candidate(data_slice, literals_len);
1103            for match_entry in self.window.iter() {
1104                if let Some(match_index) = match_entry.suffixes.get(key) {
1105                    let match_slice = &match_entry.data[match_index..];
1106
1107                    // Check how long the common prefix actually is
1108                    let match_len = Self::common_prefix_len(match_slice, data_slice);
1109
1110                    // Collisions in the suffix store might make this check fail
1111                    if match_len >= MIN_MATCH_LEN {
1112                        let offset = match_entry.base_offset + self.suffix_idx - match_index;
1113
1114                        // If we are in debug/tests make sure the match we found is actually at the offset we calculated
1115                        #[cfg(debug_assertions)]
1116                        {
1117                            let unprocessed = last_entry.data.len() - self.suffix_idx;
1118                            let start = self.concat_window.len() - unprocessed - offset;
1119                            let end = start + match_len;
1120                            let check_slice = &self.concat_window[start..end];
1121                            debug_assert_eq!(check_slice, &match_slice[..match_len]);
1122                        }
1123
1124                        if let Some((old_offset, old_match_len)) = candidate {
1125                            if match_len > old_match_len
1126                                || (match_len == old_match_len && offset < old_offset)
1127                            {
1128                                candidate = Some((offset, match_len));
1129                            }
1130                        } else {
1131                            candidate = Some((offset, match_len));
1132                        }
1133                    }
1134                }
1135            }
1136
1137            if let Some((offset, match_len)) = candidate {
1138                // For each index in the match we found we do not need to look for another match
1139                // But we still want them registered in the suffix store
1140                self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
1141
1142                // All literals that were not included between this match and the last are now included here
1143                let last_entry = self.window.last().unwrap();
1144                let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
1145
1146                // Update the indexes, all indexes upto and including the current index have been included in a sequence now
1147                self.suffix_idx += match_len;
1148                self.last_idx_in_sequence = self.suffix_idx;
1149                let _ = encode_offset_with_history(
1150                    offset as u32,
1151                    literals.len() as u32,
1152                    &mut self.offset_hist,
1153                );
1154                handle_sequence(Sequence::Triple {
1155                    literals,
1156                    offset,
1157                    match_len,
1158                });
1159
1160                return true;
1161            }
1162
1163            let last_entry = self.window.last_mut().unwrap();
1164            let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
1165            if !last_entry.suffixes.contains_key(key) {
1166                last_entry.suffixes.insert(key, self.suffix_idx);
1167            }
1168            self.suffix_idx += 1;
1169        }
1170    }
1171
1172    /// Find the common prefix length between two byte slices. Delegates to
1173    /// the fastpath dispatcher so kernel selection (NEON / SSE4.2 /
1174    /// AVX2+BMI2 / scalar) lives in one place. See
1175    /// [`crate::encoding::fastpath`] for the per-CPU implementations.
1176    #[inline(always)]
1177    fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
1178        let max = a.len().min(b.len());
1179        // SAFETY: slice `a` / `b` guarantee at least their `len()` initialized
1180        // bytes; `max` is the minimum so both pointers are valid for `max`
1181        // bytes.
1182        unsafe {
1183            crate::encoding::fastpath::dispatch_common_prefix_len_ptr(a.as_ptr(), b.as_ptr(), max)
1184        }
1185    }
1186
1187    /// Process bytes and add the suffixes to the suffix store up to a specific index
1188    #[inline(always)]
1189    fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
1190        let start = self.suffix_idx;
1191        let last_entry = self.window.last_mut().unwrap();
1192        if last_entry.data.len() < MIN_MATCH_LEN {
1193            return;
1194        }
1195        let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
1196        if insert_limit > start {
1197            let data = last_entry.data.as_slice();
1198            let suffixes = &mut last_entry.suffixes;
1199            if fill_step == FAST_HASH_FILL_STEP {
1200                Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
1201            } else {
1202                let mut pos = start;
1203                while pos < insert_limit {
1204                    Self::insert_suffix_if_absent(data, suffixes, pos);
1205                    pos += fill_step;
1206                }
1207            }
1208        }
1209
1210        if idx >= start + MIN_MATCH_LEN {
1211            let tail_start = idx - MIN_MATCH_LEN;
1212            let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
1213            if !last_entry.suffixes.contains_key(tail_key) {
1214                last_entry.suffixes.insert(tail_key, tail_start);
1215            }
1216        }
1217    }
1218
1219    #[inline(always)]
1220    fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
1221        debug_assert!(
1222            pos + MIN_MATCH_LEN <= data.len(),
1223            "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
1224            pos,
1225            MIN_MATCH_LEN,
1226            data.len()
1227        );
1228        let key = &data[pos..pos + MIN_MATCH_LEN];
1229        if !suffixes.contains_key(key) {
1230            suffixes.insert(key, pos);
1231        }
1232    }
1233
1234    #[inline(always)]
1235    fn add_suffixes_interleaved_fast(
1236        data: &[u8],
1237        suffixes: &mut SuffixStore,
1238        start: usize,
1239        insert_limit: usize,
1240    ) {
1241        let lane = FAST_HASH_FILL_STEP;
1242        let mut pos = start;
1243
1244        // Pipeline-ish fill: compute and retire several hash positions per loop
1245        // so the fastest path keeps multiple independent hash lookups in flight.
1246        while pos + lane * 3 < insert_limit {
1247            let p0 = pos;
1248            let p1 = pos + lane;
1249            let p2 = pos + lane * 2;
1250            let p3 = pos + lane * 3;
1251
1252            Self::insert_suffix_if_absent(data, suffixes, p0);
1253            Self::insert_suffix_if_absent(data, suffixes, p1);
1254            Self::insert_suffix_if_absent(data, suffixes, p2);
1255            Self::insert_suffix_if_absent(data, suffixes, p3);
1256
1257            pos += lane * 4;
1258        }
1259
1260        while pos < insert_limit {
1261            Self::insert_suffix_if_absent(data, suffixes, pos);
1262            pos += lane;
1263        }
1264    }
1265
1266    fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
1267        if literals_len != 0 {
1268            return None;
1269        }
1270
1271        let reps = [
1272            Some(self.offset_hist[1] as usize),
1273            Some(self.offset_hist[2] as usize),
1274            (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1275        ];
1276
1277        let mut best: Option<(usize, usize)> = None;
1278        let mut seen = [0usize; 3];
1279        let mut seen_len = 0usize;
1280        for offset in reps.into_iter().flatten() {
1281            if offset == 0 {
1282                continue;
1283            }
1284            if seen[..seen_len].contains(&offset) {
1285                continue;
1286            }
1287            seen[seen_len] = offset;
1288            seen_len += 1;
1289
1290            let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1291                continue;
1292            };
1293            if match_len < MIN_MATCH_LEN {
1294                continue;
1295            }
1296
1297            if best.is_none_or(|(old_offset, old_len)| {
1298                match_len > old_len || (match_len == old_len && offset < old_offset)
1299            }) {
1300                best = Some((offset, match_len));
1301            }
1302        }
1303        best
1304    }
1305
1306    fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1307        if offset == 0 {
1308            return None;
1309        }
1310
1311        let last_idx = self.window.len().checked_sub(1)?;
1312        let last_entry = &self.window[last_idx];
1313        let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1314        if offset > searchable_prefix {
1315            return None;
1316        }
1317
1318        let mut remaining = offset;
1319        let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1320            (last_idx, self.suffix_idx - remaining)
1321        } else {
1322            remaining -= self.suffix_idx;
1323            let mut found = None;
1324            for entry_idx in (0..last_idx).rev() {
1325                let len = self.window[entry_idx].data.len();
1326                if remaining <= len {
1327                    found = Some((entry_idx, len - remaining));
1328                    break;
1329                }
1330                remaining -= len;
1331            }
1332            found?
1333        };
1334
1335        let match_entry = &self.window[entry_idx];
1336        let match_slice = &match_entry.data[match_index..];
1337
1338        Some(Self::common_prefix_len(match_slice, data_slice))
1339    }
1340
1341    /// Skip matching for the whole current window entry.
1342    ///
1343    /// When callers already know the block is incompressible, index positions
1344    /// sparsely and keep a dense tail so the next block still gets boundary
1345    /// matches.
1346    fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
1347        let len = self.window.last().unwrap().data.len();
1348        if incompressible_hint == Some(true) {
1349            let dense_tail = MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
1350            let sparse_end = len.saturating_sub(dense_tail);
1351            self.add_suffixes_till(sparse_end, INCOMPRESSIBLE_SKIP_STEP);
1352            self.suffix_idx = sparse_end;
1353            self.add_suffixes_till(len, 1);
1354        } else {
1355            self.add_suffixes_till(len, 1);
1356        }
1357        self.suffix_idx = len;
1358        self.last_idx_in_sequence = len;
1359    }
1360
1361    /// Backward-compatible dense path used by tests.
1362    #[cfg(test)]
1363    fn skip_matching(&mut self) {
1364        self.skip_matching_with_hint(None);
1365    }
1366
1367    /// Add a new window entry. Will panic if the last window entry hasn't been processed properly.
1368    /// If any resources are released by pushing the new entry they are returned via the callback
1369    fn add_data(
1370        &mut self,
1371        data: Vec<u8>,
1372        suffixes: SuffixStore,
1373        reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1374    ) {
1375        assert!(
1376            self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1377        );
1378        self.reserve(data.len(), reuse_space);
1379        #[cfg(debug_assertions)]
1380        self.concat_window.extend_from_slice(&data);
1381
1382        if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1383            for entry in self.window.iter_mut() {
1384                entry.base_offset += last_len;
1385            }
1386        }
1387
1388        let len = data.len();
1389        self.window.push(WindowEntry {
1390            data,
1391            suffixes,
1392            base_offset: 0,
1393        });
1394        self.window_size += len;
1395        self.suffix_idx = 0;
1396        self.last_idx_in_sequence = 0;
1397    }
1398
1399    /// Reserve space for a new window entry
1400    /// If any resources are released by pushing the new entry they are returned via the callback
1401    fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1402        assert!(self.max_window_size >= amount);
1403        while self.window_size + amount > self.max_window_size {
1404            let removed = self.window.remove(0);
1405            self.window_size -= removed.data.len();
1406            #[cfg(debug_assertions)]
1407            self.concat_window.drain(0..removed.data.len());
1408
1409            let WindowEntry {
1410                suffixes,
1411                data: leaked_vec,
1412                base_offset: _,
1413            } = removed;
1414            reuse_space(leaked_vec, suffixes);
1415        }
1416    }
1417}
1418
1419struct DfastMatchGenerator {
1420    max_window_size: usize,
1421    window: VecDeque<Vec<u8>>,
1422    window_size: usize,
1423    // We keep a contiguous searchable history to avoid rebuilding and reseeding
1424    // the matcher state from disjoint block buffers on every block.
1425    history: Vec<u8>,
1426    history_start: usize,
1427    history_abs_start: usize,
1428    offset_hist: [u32; 3],
1429    short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1430    long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1431    hash_bits: usize,
1432    /// Cached fastpath kernel for `hash_mix_u64`. Resolved once at `new()`
1433    /// and reused on every `hash_index` call so we skip the per-call
1434    /// `OnceLock` atomic load that `dispatch_hash_mix_u64` would pay.
1435    hash_kernel: crate::encoding::fastpath::FastpathKernel,
1436    use_fast_loop: bool,
1437    // Lazy match lookahead depth (internal tuning parameter).
1438    lazy_depth: u8,
1439}
1440
1441#[derive(Copy, Clone, Debug)]
1442struct MatchCandidate {
1443    start: usize,
1444    offset: usize,
1445    match_len: usize,
1446}
1447
1448fn best_len_offset_candidate(
1449    lhs: Option<MatchCandidate>,
1450    rhs: Option<MatchCandidate>,
1451) -> Option<MatchCandidate> {
1452    match (lhs, rhs) {
1453        (None, other) | (other, None) => other,
1454        (Some(lhs), Some(rhs)) => {
1455            if rhs.match_len > lhs.match_len
1456                || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1457            {
1458                Some(rhs)
1459            } else {
1460                Some(lhs)
1461            }
1462        }
1463    }
1464}
1465
1466#[inline]
1467fn extend_backwards_shared(
1468    concat: &[u8],
1469    history_abs_start: usize,
1470    mut candidate_pos: usize,
1471    mut abs_pos: usize,
1472    mut match_len: usize,
1473    lit_len: usize,
1474) -> MatchCandidate {
1475    let min_abs_pos = abs_pos - lit_len;
1476    let concat_ptr = concat.as_ptr();
1477    let concat_len = concat.len();
1478    // SAFETY: loop guard `candidate_pos > history_abs_start` and
1479    // `abs_pos > min_abs_pos` keep both `candidate_pos - history_abs_start - 1`
1480    // and `abs_pos - history_abs_start - 1` strictly positive (no underflow).
1481    // Their upper bound is `concat.len() - 1` because both `candidate_pos` and
1482    // `abs_pos` point at currently-live history. Asserted in debug builds.
1483    while abs_pos > min_abs_pos && candidate_pos > history_abs_start {
1484        let cand_off = candidate_pos - history_abs_start - 1;
1485        let cur_off = abs_pos - history_abs_start - 1;
1486        debug_assert!(cand_off < concat_len && cur_off < concat_len);
1487        let cand_byte = unsafe { *concat_ptr.add(cand_off) };
1488        let cur_byte = unsafe { *concat_ptr.add(cur_off) };
1489        if cand_byte != cur_byte {
1490            break;
1491        }
1492        candidate_pos -= 1;
1493        abs_pos -= 1;
1494        match_len += 1;
1495    }
1496    MatchCandidate {
1497        start: abs_pos,
1498        offset: abs_pos - candidate_pos,
1499        match_len,
1500    }
1501}
1502
1503#[inline]
1504fn repcode_candidate_shared(
1505    concat: &[u8],
1506    history_abs_start: usize,
1507    offset_hist: [u32; 3],
1508    abs_pos: usize,
1509    lit_len: usize,
1510    min_match_len: usize,
1511) -> Option<MatchCandidate> {
1512    let current_idx = abs_pos - history_abs_start;
1513    if current_idx + min_match_len > concat.len() {
1514        return None;
1515    }
1516
1517    // Called once per input byte (10% exclusive on default-level profile).
1518    // The previous form built an `[Option<usize>; 3]` and walked it via
1519    // `into_iter().flatten()`, which the compiler couldn't always unroll
1520    // through the conditional `then_some` on the last slot. Unroll the
1521    // 3-rep probe by hand: each branch is a couple of compares + one
1522    // `common_prefix_len` (already SIMD).
1523    let mut best: Option<MatchCandidate> = None;
1524
1525    let (rep0, rep1, rep2_opt) = if lit_len == 0 {
1526        let r2 = if offset_hist[0] > 1 {
1527            Some(offset_hist[0] as usize - 1)
1528        } else {
1529            None
1530        };
1531        (offset_hist[1] as usize, offset_hist[2] as usize, r2)
1532    } else {
1533        (
1534            offset_hist[0] as usize,
1535            offset_hist[1] as usize,
1536            Some(offset_hist[2] as usize),
1537        )
1538    };
1539
1540    macro_rules! probe {
1541        ($rep:expr) => {{
1542            let rep = $rep;
1543            if rep != 0 && rep <= abs_pos {
1544                let candidate_pos = abs_pos - rep;
1545                if candidate_pos >= history_abs_start {
1546                    let candidate_idx = candidate_pos - history_abs_start;
1547                    let match_len = MatchGenerator::common_prefix_len(
1548                        &concat[candidate_idx..],
1549                        &concat[current_idx..],
1550                    );
1551                    if match_len >= min_match_len {
1552                        let candidate = extend_backwards_shared(
1553                            concat,
1554                            history_abs_start,
1555                            candidate_pos,
1556                            abs_pos,
1557                            match_len,
1558                            lit_len,
1559                        );
1560                        best = best_len_offset_candidate(best, Some(candidate));
1561                    }
1562                }
1563            }
1564        }};
1565    }
1566
1567    probe!(rep0);
1568    probe!(rep1);
1569    if let Some(rep2) = rep2_opt {
1570        probe!(rep2);
1571    }
1572    best
1573}
1574
1575#[derive(Copy, Clone)]
1576struct LazyMatchConfig {
1577    target_len: usize,
1578    min_match_len: usize,
1579    lazy_depth: u8,
1580    history_abs_end: usize,
1581}
1582
1583fn pick_lazy_match_shared(
1584    abs_pos: usize,
1585    lit_len: usize,
1586    best: Option<MatchCandidate>,
1587    config: LazyMatchConfig,
1588    mut best_match_at: impl FnMut(usize, usize) -> Option<MatchCandidate>,
1589) -> Option<MatchCandidate> {
1590    let best = best?;
1591    if best.match_len >= config.target_len
1592        || abs_pos + 1 + config.min_match_len > config.history_abs_end
1593    {
1594        return Some(best);
1595    }
1596
1597    let next = best_match_at(abs_pos + 1, lit_len + 1);
1598    if let Some(next) = next
1599        && (next.match_len > best.match_len
1600            || (next.match_len == best.match_len && next.offset < best.offset))
1601    {
1602        return None;
1603    }
1604
1605    if config.lazy_depth >= 2 && abs_pos + 2 + config.min_match_len <= config.history_abs_end {
1606        let next2 = best_match_at(abs_pos + 2, lit_len + 2);
1607        if let Some(next2) = next2
1608            && next2.match_len > best.match_len + 1
1609        {
1610            return None;
1611        }
1612    }
1613
1614    Some(best)
1615}
1616
1617impl DfastMatchGenerator {
1618    // Keep a short dense tail at block boundaries for two related reasons:
1619    // 1) insert_position() needs short (4-byte) and long (8-byte) lookahead,
1620    //    so appending a new block can make starts from the previous block newly
1621    //    hashable and require backfill;
1622    // 2) we also need enough trailing bytes from the previous block to preserve
1623    //    cross-block matching for the minimum match length.
1624    const BOUNDARY_DENSE_TAIL_LEN: usize = DFAST_MIN_MATCH_LEN + 3;
1625
1626    fn new(max_window_size: usize) -> Self {
1627        Self {
1628            max_window_size,
1629            window: VecDeque::new(),
1630            window_size: 0,
1631            history: Vec::new(),
1632            history_start: 0,
1633            history_abs_start: 0,
1634            offset_hist: [1, 4, 8],
1635            short_hash: Vec::new(),
1636            long_hash: Vec::new(),
1637            hash_bits: DFAST_HASH_BITS,
1638            hash_kernel: crate::encoding::fastpath::select_kernel(),
1639            use_fast_loop: false,
1640            lazy_depth: 1,
1641        }
1642    }
1643
1644    fn set_hash_bits(&mut self, bits: usize) {
1645        let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1646        if self.hash_bits != clamped {
1647            self.hash_bits = clamped;
1648            self.short_hash = Vec::new();
1649            self.long_hash = Vec::new();
1650        }
1651    }
1652
1653    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1654        self.window_size = 0;
1655        self.history.clear();
1656        self.history_start = 0;
1657        self.history_abs_start = 0;
1658        self.offset_hist = [1, 4, 8];
1659        if !self.short_hash.is_empty() {
1660            self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1661            self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1662        }
1663        for mut data in self.window.drain(..) {
1664            data.resize(data.capacity(), 0);
1665            reuse_space(data);
1666        }
1667    }
1668
1669    fn get_last_space(&self) -> &[u8] {
1670        self.window.back().unwrap().as_slice()
1671    }
1672
1673    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1674        assert!(data.len() <= self.max_window_size);
1675        while self.window_size + data.len() > self.max_window_size {
1676            let removed = self.window.pop_front().unwrap();
1677            self.window_size -= removed.len();
1678            self.history_start += removed.len();
1679            self.history_abs_start += removed.len();
1680            reuse_space(removed);
1681        }
1682        self.compact_history();
1683        self.history.extend_from_slice(&data);
1684        self.window_size += data.len();
1685        self.window.push_back(data);
1686    }
1687
1688    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1689        while self.window_size > self.max_window_size {
1690            let removed = self.window.pop_front().unwrap();
1691            self.window_size -= removed.len();
1692            self.history_start += removed.len();
1693            self.history_abs_start += removed.len();
1694            reuse_space(removed);
1695        }
1696    }
1697
1698    fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
1699        self.ensure_hash_tables();
1700        let current_len = self.window.back().unwrap().len();
1701        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1702        let current_abs_end = current_abs_start + current_len;
1703        let tail_start = current_abs_start.saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN);
1704        if tail_start < current_abs_start {
1705            self.insert_positions(tail_start, current_abs_start);
1706        }
1707
1708        let used_sparse = incompressible_hint
1709            .unwrap_or_else(|| self.block_looks_incompressible(current_abs_start, current_abs_end));
1710        if used_sparse {
1711            self.insert_positions_with_step(
1712                current_abs_start,
1713                current_abs_end,
1714                DFAST_INCOMPRESSIBLE_SKIP_STEP,
1715            );
1716        } else {
1717            self.insert_positions(current_abs_start, current_abs_end);
1718        }
1719
1720        // Seed the tail densely only after sparse insertion so the next block
1721        // can match across the boundary without rehashing the full block twice.
1722        if used_sparse {
1723            let tail_start = current_abs_end
1724                .saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN)
1725                .max(current_abs_start);
1726            if tail_start < current_abs_end {
1727                self.insert_positions(tail_start, current_abs_end);
1728            }
1729        }
1730    }
1731
1732    fn skip_matching_dense(&mut self) {
1733        self.ensure_hash_tables();
1734        let current_len = self.window.back().unwrap().len();
1735        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1736        let current_abs_end = current_abs_start + current_len;
1737        let backfill_start = current_abs_start
1738            .saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN)
1739            .max(self.history_abs_start);
1740        if backfill_start < current_abs_start {
1741            self.insert_positions(backfill_start, current_abs_start);
1742        }
1743        self.insert_positions(current_abs_start, current_abs_end);
1744    }
1745
1746    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1747        self.ensure_hash_tables();
1748
1749        let current_len = self.window.back().unwrap().len();
1750        if current_len == 0 {
1751            return;
1752        }
1753
1754        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1755        if self.use_fast_loop {
1756            self.start_matching_fast_loop(current_abs_start, current_len, &mut handle_sequence);
1757            return;
1758        }
1759        self.start_matching_general(current_abs_start, current_len, &mut handle_sequence);
1760    }
1761
1762    fn start_matching_general(
1763        &mut self,
1764        current_abs_start: usize,
1765        current_len: usize,
1766        handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1767    ) {
1768        let use_adaptive_skip =
1769            self.block_looks_incompressible(current_abs_start, current_abs_start + current_len);
1770        let mut pos = 1usize;
1771        let mut literals_start = 0usize;
1772        let mut skip_step = 1usize;
1773        let mut next_skip_growth_pos = DFAST_SKIP_STEP_GROWTH_INTERVAL;
1774        let mut miss_run = 0usize;
1775        while pos + DFAST_MIN_MATCH_LEN <= current_len {
1776            let abs_pos = current_abs_start + pos;
1777            let lit_len = pos - literals_start;
1778
1779            let best = self.best_match(abs_pos, lit_len);
1780            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1781                let start = self.emit_candidate(
1782                    current_abs_start,
1783                    &mut literals_start,
1784                    candidate,
1785                    handle_sequence,
1786                );
1787                pos = start + candidate.match_len;
1788                skip_step = 1;
1789                next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1790                miss_run = 0;
1791            } else {
1792                self.insert_position(abs_pos);
1793                miss_run = miss_run.saturating_add(1);
1794                let use_local_adaptive_skip = miss_run >= DFAST_LOCAL_SKIP_TRIGGER;
1795                if use_adaptive_skip || use_local_adaptive_skip {
1796                    let skip_cap = if use_adaptive_skip {
1797                        DFAST_MAX_SKIP_STEP
1798                    } else {
1799                        2
1800                    };
1801                    if pos >= next_skip_growth_pos {
1802                        skip_step = (skip_step + 1).min(skip_cap);
1803                        next_skip_growth_pos =
1804                            next_skip_growth_pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1805                    }
1806                    pos = pos.saturating_add(skip_step);
1807                } else {
1808                    pos += 1;
1809                }
1810            }
1811        }
1812
1813        self.seed_remaining_hashable_starts(current_abs_start, current_len, pos);
1814        self.emit_trailing_literals(literals_start, handle_sequence);
1815    }
1816
1817    fn start_matching_fast_loop(
1818        &mut self,
1819        current_abs_start: usize,
1820        current_len: usize,
1821        handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1822    ) {
1823        let block_is_strict_incompressible = self
1824            .block_looks_incompressible_strict(current_abs_start, current_abs_start + current_len);
1825        let mut pos = 1usize;
1826        let mut literals_start = 0usize;
1827        let mut skip_step = 1usize;
1828        let mut next_skip_growth_pos = DFAST_SKIP_STEP_GROWTH_INTERVAL;
1829        let mut miss_run = 0usize;
1830        while pos + DFAST_MIN_MATCH_LEN <= current_len {
1831            let ip0 = pos;
1832            let ip1 = ip0.saturating_add(1);
1833            let ip2 = ip0.saturating_add(2);
1834            let ip3 = ip0.saturating_add(3);
1835
1836            let abs_ip0 = current_abs_start + ip0;
1837            let lit_len_ip0 = ip0 - literals_start;
1838
1839            if ip2 + DFAST_MIN_MATCH_LEN <= current_len {
1840                let abs_ip2 = current_abs_start + ip2;
1841                let lit_len_ip2 = ip2 - literals_start;
1842                if let Some(rep) = self.repcode_candidate(abs_ip2, lit_len_ip2)
1843                    && rep.start >= current_abs_start + literals_start
1844                    && rep.start <= abs_ip2
1845                {
1846                    let start = self.emit_candidate(
1847                        current_abs_start,
1848                        &mut literals_start,
1849                        rep,
1850                        handle_sequence,
1851                    );
1852                    pos = start + rep.match_len;
1853                    skip_step = 1;
1854                    next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1855                    miss_run = 0;
1856                    continue;
1857                }
1858            }
1859
1860            let best = self.best_match(abs_ip0, lit_len_ip0);
1861            if let Some(candidate) = best {
1862                let start = self.emit_candidate(
1863                    current_abs_start,
1864                    &mut literals_start,
1865                    candidate,
1866                    handle_sequence,
1867                );
1868                pos = start + candidate.match_len;
1869                skip_step = 1;
1870                next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1871                miss_run = 0;
1872            } else {
1873                self.insert_position(abs_ip0);
1874                if ip1 + 4 <= current_len {
1875                    self.insert_position(current_abs_start + ip1);
1876                }
1877                if ip2 + 4 <= current_len {
1878                    self.insert_position(current_abs_start + ip2);
1879                }
1880                if ip3 + 4 <= current_len {
1881                    self.insert_position(current_abs_start + ip3);
1882                }
1883                miss_run = miss_run.saturating_add(1);
1884                if block_is_strict_incompressible || miss_run >= DFAST_LOCAL_SKIP_TRIGGER {
1885                    let skip_cap = DFAST_MAX_SKIP_STEP;
1886                    if pos >= next_skip_growth_pos {
1887                        skip_step = (skip_step + 1).min(skip_cap);
1888                        next_skip_growth_pos =
1889                            next_skip_growth_pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1890                    }
1891                    pos = pos.saturating_add(skip_step);
1892                } else {
1893                    skip_step = 1;
1894                    next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1895                    pos += 1;
1896                }
1897            }
1898        }
1899
1900        self.seed_remaining_hashable_starts(current_abs_start, current_len, pos);
1901        self.emit_trailing_literals(literals_start, handle_sequence);
1902    }
1903
1904    fn seed_remaining_hashable_starts(
1905        &mut self,
1906        current_abs_start: usize,
1907        current_len: usize,
1908        pos: usize,
1909    ) {
1910        let boundary_tail_start = current_len.saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN);
1911        let mut seed_pos = pos.min(current_len).min(boundary_tail_start);
1912        while seed_pos + DFAST_SHORT_HASH_LOOKAHEAD <= current_len {
1913            self.insert_position(current_abs_start + seed_pos);
1914            seed_pos += 1;
1915        }
1916    }
1917
1918    fn emit_candidate(
1919        &mut self,
1920        current_abs_start: usize,
1921        literals_start: &mut usize,
1922        candidate: MatchCandidate,
1923        handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1924    ) -> usize {
1925        self.insert_positions(
1926            current_abs_start + *literals_start,
1927            candidate.start + candidate.match_len,
1928        );
1929        let current = self.window.back().unwrap().as_slice();
1930        let start = candidate.start - current_abs_start;
1931        let literals = &current[*literals_start..start];
1932        handle_sequence(Sequence::Triple {
1933            literals,
1934            offset: candidate.offset,
1935            match_len: candidate.match_len,
1936        });
1937        let _ = encode_offset_with_history(
1938            candidate.offset as u32,
1939            literals.len() as u32,
1940            &mut self.offset_hist,
1941        );
1942        *literals_start = start + candidate.match_len;
1943        start
1944    }
1945
1946    fn emit_trailing_literals(
1947        &self,
1948        literals_start: usize,
1949        handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1950    ) {
1951        if literals_start < self.window.back().unwrap().len() {
1952            let current = self.window.back().unwrap().as_slice();
1953            handle_sequence(Sequence::Literals {
1954                literals: &current[literals_start..],
1955            });
1956        }
1957    }
1958
1959    fn ensure_hash_tables(&mut self) {
1960        let table_len = 1usize << self.hash_bits;
1961        if self.short_hash.len() != table_len {
1962            // This is intentionally lazy so Fastest/Uncompressed never pay the
1963            // ~dfast-level memory cost. The current size tracks the issue's
1964            // zstd level-3 style parameters rather than a generic low-memory preset.
1965            self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1966            self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1967        }
1968    }
1969
1970    fn compact_history(&mut self) {
1971        if self.history_start == 0 {
1972            return;
1973        }
1974        if self.history_start >= self.max_window_size
1975            || self.history_start * 2 >= self.history.len()
1976        {
1977            self.history.drain(..self.history_start);
1978            self.history_start = 0;
1979        }
1980    }
1981
1982    fn live_history(&self) -> &[u8] {
1983        &self.history[self.history_start..]
1984    }
1985
1986    fn history_abs_end(&self) -> usize {
1987        self.history_abs_start + self.live_history().len()
1988    }
1989
1990    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1991        let rep = self.repcode_candidate(abs_pos, lit_len);
1992        let hash = self.hash_candidate(abs_pos, lit_len);
1993        best_len_offset_candidate(rep, hash)
1994    }
1995
1996    fn pick_lazy_match(
1997        &self,
1998        abs_pos: usize,
1999        lit_len: usize,
2000        best: Option<MatchCandidate>,
2001    ) -> Option<MatchCandidate> {
2002        pick_lazy_match_shared(
2003            abs_pos,
2004            lit_len,
2005            best,
2006            LazyMatchConfig {
2007                target_len: DFAST_TARGET_LEN,
2008                min_match_len: DFAST_MIN_MATCH_LEN,
2009                lazy_depth: self.lazy_depth,
2010                history_abs_end: self.history_abs_end(),
2011            },
2012            |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
2013        )
2014    }
2015
2016    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2017        repcode_candidate_shared(
2018            self.live_history(),
2019            self.history_abs_start,
2020            self.offset_hist,
2021            abs_pos,
2022            lit_len,
2023            DFAST_MIN_MATCH_LEN,
2024        )
2025    }
2026
2027    fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2028        // Hoist all the per-loop invariants out of the combinator chains.
2029        // `short_candidates`/`long_candidates` each re-fetch `live_history`
2030        // and recompute `idx` from scratch inside their Option/flatten/filter
2031        // adapters; on a per-byte hot path (32% exclusive on default-level
2032        // profile) that's measurable Option/Iterator scaffolding the
2033        // compiler can't always erase.
2034        let concat = self.live_history();
2035        let current_idx = abs_pos - self.history_abs_start;
2036        let history_abs_start = self.history_abs_start;
2037        let mut best = None;
2038
2039        // Long-hash probes first (8-byte hash → longer matches more likely).
2040        if current_idx + 8 <= concat.len() {
2041            let long_hash = self.hash8(&concat[current_idx..]);
2042            // SAFETY: `hash_index` masks to `hash_bits` and `long_hash.len()
2043            // == 1 << hash_bits` (`ensure_hash_tables`).
2044            debug_assert!(long_hash < self.long_hash.len());
2045            let bucket = unsafe { self.long_hash.get_unchecked(long_hash) };
2046            for &candidate_pos in bucket {
2047                if candidate_pos == DFAST_EMPTY_SLOT
2048                    || candidate_pos < history_abs_start
2049                    || candidate_pos >= abs_pos
2050                {
2051                    continue;
2052                }
2053                let candidate_idx = candidate_pos - history_abs_start;
2054                let match_len = MatchGenerator::common_prefix_len(
2055                    &concat[candidate_idx..],
2056                    &concat[current_idx..],
2057                );
2058                if match_len >= DFAST_MIN_MATCH_LEN {
2059                    let candidate =
2060                        self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2061                    best = best_len_offset_candidate(best, Some(candidate));
2062                    if best.is_some_and(|b| b.match_len >= DFAST_TARGET_LEN) {
2063                        return best;
2064                    }
2065                }
2066            }
2067        }
2068
2069        if current_idx + 4 <= concat.len() {
2070            let short_hash = self.hash4(&concat[current_idx..]);
2071            debug_assert!(short_hash < self.short_hash.len());
2072            let bucket = unsafe { self.short_hash.get_unchecked(short_hash) };
2073            for &candidate_pos in bucket {
2074                if candidate_pos == DFAST_EMPTY_SLOT
2075                    || candidate_pos < history_abs_start
2076                    || candidate_pos >= abs_pos
2077                {
2078                    continue;
2079                }
2080                let candidate_idx = candidate_pos - history_abs_start;
2081                let match_len = MatchGenerator::common_prefix_len(
2082                    &concat[candidate_idx..],
2083                    &concat[current_idx..],
2084                );
2085                if match_len >= DFAST_MIN_MATCH_LEN {
2086                    let candidate =
2087                        self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2088                    best = best_len_offset_candidate(best, Some(candidate));
2089                    if best.is_some_and(|b| b.match_len >= DFAST_TARGET_LEN) {
2090                        return best;
2091                    }
2092                }
2093            }
2094        }
2095        best
2096    }
2097
2098    fn extend_backwards(
2099        &self,
2100        candidate_pos: usize,
2101        abs_pos: usize,
2102        match_len: usize,
2103        lit_len: usize,
2104    ) -> MatchCandidate {
2105        extend_backwards_shared(
2106            self.live_history(),
2107            self.history_abs_start,
2108            candidate_pos,
2109            abs_pos,
2110            match_len,
2111            lit_len,
2112        )
2113    }
2114
2115    fn insert_positions(&mut self, start: usize, end: usize) {
2116        let start = start.max(self.history_abs_start);
2117        let end = end.min(self.history_abs_end());
2118        for pos in start..end {
2119            self.insert_position(pos);
2120        }
2121    }
2122
2123    fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
2124        let start = start.max(self.history_abs_start);
2125        let end = end.min(self.history_abs_end());
2126        if step <= 1 {
2127            self.insert_positions(start, end);
2128            return;
2129        }
2130        let mut pos = start;
2131        while pos < end {
2132            self.insert_position(pos);
2133            pos = pos.saturating_add(step);
2134        }
2135    }
2136
2137    #[inline]
2138    fn insert_position(&mut self, pos: usize) {
2139        let idx = pos.wrapping_sub(self.history_abs_start);
2140        let concat_len = self.history.len() - self.history_start;
2141        // SAFETY: `hash_index` masks the mixed hash to `hash_bits` bits and
2142        // both tables are sized to `1 << hash_bits` in `ensure_hash_tables`,
2143        // so every index produced here is provably below the table length.
2144        // Eliding the bounds check on this per-byte hot path saves ~4
2145        // instructions and one branch per call.
2146        if idx + 4 <= concat_len {
2147            let concat = &self.history[self.history_start..];
2148            let short = self.hash4(&concat[idx..]);
2149            debug_assert!(short < self.short_hash.len());
2150            let bucket = unsafe { self.short_hash.get_unchecked_mut(short) };
2151            if bucket[0] != pos {
2152                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
2153                bucket[0] = pos;
2154            }
2155        }
2156
2157        if idx + 8 <= concat_len {
2158            let concat = &self.history[self.history_start..];
2159            let long = self.hash8(&concat[idx..]);
2160            debug_assert!(long < self.long_hash.len());
2161            let bucket = unsafe { self.long_hash.get_unchecked_mut(long) };
2162            if bucket[0] != pos {
2163                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
2164                bucket[0] = pos;
2165            }
2166        }
2167    }
2168
2169    fn hash4(&self, data: &[u8]) -> usize {
2170        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2171        self.hash_index(value)
2172    }
2173
2174    fn hash8(&self, data: &[u8]) -> usize {
2175        let value = u64::from_le_bytes(data[..8].try_into().unwrap());
2176        self.hash_index(value)
2177    }
2178
2179    fn block_looks_incompressible(&self, start: usize, end: usize) -> bool {
2180        let live = self.live_history();
2181        if start >= end || start < self.history_abs_start {
2182            return false;
2183        }
2184        let start_idx = start - self.history_abs_start;
2185        let end_idx = end - self.history_abs_start;
2186        if end_idx > live.len() {
2187            return false;
2188        }
2189        let block = &live[start_idx..end_idx];
2190        block_looks_incompressible(block)
2191    }
2192
2193    fn block_looks_incompressible_strict(&self, start: usize, end: usize) -> bool {
2194        let live = self.live_history();
2195        if start >= end || start < self.history_abs_start {
2196            return false;
2197        }
2198        let start_idx = start - self.history_abs_start;
2199        let end_idx = end - self.history_abs_start;
2200        if end_idx > live.len() {
2201            return false;
2202        }
2203        let block = &live[start_idx..end_idx];
2204        block_looks_incompressible_strict(block)
2205    }
2206
2207    fn hash_index(&self, value: u64) -> usize {
2208        let mixed = crate::encoding::fastpath::hash_mix_u64_with_kernel(self.hash_kernel, value);
2209        (mixed >> (64 - self.hash_bits)) as usize
2210    }
2211}
2212
2213struct RowMatchGenerator {
2214    max_window_size: usize,
2215    window: VecDeque<Vec<u8>>,
2216    window_size: usize,
2217    history: Vec<u8>,
2218    history_start: usize,
2219    history_abs_start: usize,
2220    offset_hist: [u32; 3],
2221    row_hash_log: usize,
2222    row_log: usize,
2223    search_depth: usize,
2224    target_len: usize,
2225    lazy_depth: u8,
2226    /// Cached fastpath kernel for `hash_mix_u64`; see Dfast for rationale.
2227    hash_kernel: crate::encoding::fastpath::FastpathKernel,
2228    row_heads: Vec<u8>,
2229    row_positions: Vec<usize>,
2230    row_tags: Vec<u8>,
2231}
2232
2233impl RowMatchGenerator {
2234    fn new(max_window_size: usize) -> Self {
2235        Self {
2236            max_window_size,
2237            window: VecDeque::new(),
2238            window_size: 0,
2239            history: Vec::new(),
2240            history_start: 0,
2241            history_abs_start: 0,
2242            offset_hist: [1, 4, 8],
2243            row_hash_log: ROW_HASH_BITS - ROW_LOG,
2244            row_log: ROW_LOG,
2245            search_depth: ROW_SEARCH_DEPTH,
2246            target_len: ROW_TARGET_LEN,
2247            lazy_depth: 1,
2248            hash_kernel: crate::encoding::fastpath::select_kernel(),
2249            row_heads: Vec::new(),
2250            row_positions: Vec::new(),
2251            row_tags: Vec::new(),
2252        }
2253    }
2254
2255    fn set_hash_bits(&mut self, bits: usize) {
2256        let clamped = bits.clamp(self.row_log + 1, ROW_HASH_BITS);
2257        let row_hash_log = clamped.saturating_sub(self.row_log);
2258        if self.row_hash_log != row_hash_log {
2259            self.row_hash_log = row_hash_log;
2260            self.row_heads.clear();
2261            self.row_positions.clear();
2262            self.row_tags.clear();
2263        }
2264    }
2265
2266    fn configure(&mut self, config: RowConfig) {
2267        self.row_log = config.row_log.clamp(4, 6);
2268        self.search_depth = config.search_depth;
2269        self.target_len = config.target_len;
2270        self.set_hash_bits(config.hash_bits.max(self.row_log + 1));
2271    }
2272
2273    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2274        self.window_size = 0;
2275        self.history.clear();
2276        self.history_start = 0;
2277        self.history_abs_start = 0;
2278        self.offset_hist = [1, 4, 8];
2279        self.row_heads.fill(0);
2280        self.row_positions.fill(ROW_EMPTY_SLOT);
2281        self.row_tags.fill(0);
2282        for mut data in self.window.drain(..) {
2283            data.resize(data.capacity(), 0);
2284            reuse_space(data);
2285        }
2286    }
2287
2288    fn get_last_space(&self) -> &[u8] {
2289        self.window.back().unwrap().as_slice()
2290    }
2291
2292    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2293        assert!(data.len() <= self.max_window_size);
2294        while self.window_size + data.len() > self.max_window_size {
2295            let removed = self.window.pop_front().unwrap();
2296            self.window_size -= removed.len();
2297            self.history_start += removed.len();
2298            self.history_abs_start += removed.len();
2299            reuse_space(removed);
2300        }
2301        self.compact_history();
2302        self.history.extend_from_slice(&data);
2303        self.window_size += data.len();
2304        self.window.push_back(data);
2305    }
2306
2307    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2308        while self.window_size > self.max_window_size {
2309            let removed = self.window.pop_front().unwrap();
2310            self.window_size -= removed.len();
2311            self.history_start += removed.len();
2312            self.history_abs_start += removed.len();
2313            reuse_space(removed);
2314        }
2315    }
2316
2317    fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
2318        self.ensure_tables();
2319        let current_len = self.window.back().unwrap().len();
2320        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2321        let current_abs_end = current_abs_start + current_len;
2322        let backfill_start = self.backfill_start(current_abs_start);
2323        if backfill_start < current_abs_start {
2324            self.insert_positions(backfill_start, current_abs_start);
2325        }
2326        if incompressible_hint == Some(true) {
2327            self.insert_positions_with_step(
2328                current_abs_start,
2329                current_abs_end,
2330                INCOMPRESSIBLE_SKIP_STEP,
2331            );
2332            let dense_tail = ROW_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
2333            let tail_start = current_abs_end
2334                .saturating_sub(dense_tail)
2335                .max(current_abs_start);
2336            for pos in tail_start..current_abs_end {
2337                if !(pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
2338                    self.insert_position(pos);
2339                }
2340            }
2341        } else {
2342            self.insert_positions(current_abs_start, current_abs_end);
2343        }
2344    }
2345
2346    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2347        self.ensure_tables();
2348
2349        let current_len = self.window.back().unwrap().len();
2350        if current_len == 0 {
2351            return;
2352        }
2353        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2354        let backfill_start = self.backfill_start(current_abs_start);
2355        if backfill_start < current_abs_start {
2356            self.insert_positions(backfill_start, current_abs_start);
2357        }
2358
2359        let mut pos = 0usize;
2360        let mut literals_start = 0usize;
2361        while pos + ROW_MIN_MATCH_LEN <= current_len {
2362            let abs_pos = current_abs_start + pos;
2363            let lit_len = pos - literals_start;
2364
2365            let best = self.best_match(abs_pos, lit_len);
2366            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2367                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2368                let current = self.window.back().unwrap().as_slice();
2369                let start = candidate.start - current_abs_start;
2370                let literals = &current[literals_start..start];
2371                handle_sequence(Sequence::Triple {
2372                    literals,
2373                    offset: candidate.offset,
2374                    match_len: candidate.match_len,
2375                });
2376                let _ = encode_offset_with_history(
2377                    candidate.offset as u32,
2378                    literals.len() as u32,
2379                    &mut self.offset_hist,
2380                );
2381                pos = start + candidate.match_len;
2382                literals_start = pos;
2383            } else {
2384                self.insert_position(abs_pos);
2385                pos += 1;
2386            }
2387        }
2388
2389        while pos + ROW_HASH_KEY_LEN <= current_len {
2390            self.insert_position(current_abs_start + pos);
2391            pos += 1;
2392        }
2393
2394        if literals_start < current_len {
2395            let current = self.window.back().unwrap().as_slice();
2396            handle_sequence(Sequence::Literals {
2397                literals: &current[literals_start..],
2398            });
2399        }
2400    }
2401
2402    fn ensure_tables(&mut self) {
2403        let row_count = 1usize << self.row_hash_log;
2404        let row_entries = 1usize << self.row_log;
2405        let total = row_count * row_entries;
2406        if self.row_positions.len() != total {
2407            self.row_heads = alloc::vec![0; row_count];
2408            self.row_positions = alloc::vec![ROW_EMPTY_SLOT; total];
2409            self.row_tags = alloc::vec![0; total];
2410        }
2411    }
2412
2413    fn compact_history(&mut self) {
2414        if self.history_start == 0 {
2415            return;
2416        }
2417        if self.history_start >= self.max_window_size
2418            || self.history_start * 2 >= self.history.len()
2419        {
2420            self.history.drain(..self.history_start);
2421            self.history_start = 0;
2422        }
2423    }
2424
2425    fn live_history(&self) -> &[u8] {
2426        &self.history[self.history_start..]
2427    }
2428
2429    fn history_abs_end(&self) -> usize {
2430        self.history_abs_start + self.live_history().len()
2431    }
2432
2433    fn hash_and_row(&self, abs_pos: usize) -> Option<(usize, u8)> {
2434        let idx = abs_pos - self.history_abs_start;
2435        let concat = self.live_history();
2436        if idx + ROW_HASH_KEY_LEN > concat.len() {
2437            return None;
2438        }
2439        let value =
2440            u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
2441        let hash = crate::encoding::fastpath::hash_mix_u64_with_kernel(self.hash_kernel, value);
2442        let total_bits = self.row_hash_log + ROW_TAG_BITS;
2443        let combined = hash >> (u64::BITS as usize - total_bits);
2444        let row_mask = (1usize << self.row_hash_log) - 1;
2445        let row = ((combined >> ROW_TAG_BITS) as usize) & row_mask;
2446        let tag = combined as u8;
2447        Some((row, tag))
2448    }
2449
2450    fn backfill_start(&self, current_abs_start: usize) -> usize {
2451        current_abs_start
2452            .saturating_sub(ROW_HASH_KEY_LEN - 1)
2453            .max(self.history_abs_start)
2454    }
2455
2456    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2457        let rep = self.repcode_candidate(abs_pos, lit_len);
2458        let row = self.row_candidate(abs_pos, lit_len);
2459        best_len_offset_candidate(rep, row)
2460    }
2461
2462    fn pick_lazy_match(
2463        &self,
2464        abs_pos: usize,
2465        lit_len: usize,
2466        best: Option<MatchCandidate>,
2467    ) -> Option<MatchCandidate> {
2468        pick_lazy_match_shared(
2469            abs_pos,
2470            lit_len,
2471            best,
2472            LazyMatchConfig {
2473                target_len: self.target_len,
2474                min_match_len: ROW_MIN_MATCH_LEN,
2475                lazy_depth: self.lazy_depth,
2476                history_abs_end: self.history_abs_end(),
2477            },
2478            |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
2479        )
2480    }
2481
2482    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2483        repcode_candidate_shared(
2484            self.live_history(),
2485            self.history_abs_start,
2486            self.offset_hist,
2487            abs_pos,
2488            lit_len,
2489            ROW_MIN_MATCH_LEN,
2490        )
2491    }
2492
2493    fn row_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2494        let concat = self.live_history();
2495        let current_idx = abs_pos - self.history_abs_start;
2496        if current_idx + ROW_MIN_MATCH_LEN > concat.len() {
2497            return None;
2498        }
2499
2500        let (row, tag) = self.hash_and_row(abs_pos)?;
2501        let row_entries = 1usize << self.row_log;
2502        let row_mask = row_entries - 1;
2503        let row_base = row << self.row_log;
2504        let head = self.row_heads[row] as usize;
2505        let max_walk = self.search_depth.min(row_entries);
2506
2507        let mut best = None;
2508        for i in 0..max_walk {
2509            let slot = (head + i) & row_mask;
2510            let idx = row_base + slot;
2511            if self.row_tags[idx] != tag {
2512                continue;
2513            }
2514            let candidate_pos = self.row_positions[idx];
2515            if candidate_pos == ROW_EMPTY_SLOT
2516                || candidate_pos < self.history_abs_start
2517                || candidate_pos >= abs_pos
2518            {
2519                continue;
2520            }
2521            let candidate_idx = candidate_pos - self.history_abs_start;
2522            let match_len =
2523                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2524            if match_len >= ROW_MIN_MATCH_LEN {
2525                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2526                best = best_len_offset_candidate(best, Some(candidate));
2527                if best.is_some_and(|best| best.match_len >= self.target_len) {
2528                    return best;
2529                }
2530            }
2531        }
2532        best
2533    }
2534
2535    fn extend_backwards(
2536        &self,
2537        candidate_pos: usize,
2538        abs_pos: usize,
2539        match_len: usize,
2540        lit_len: usize,
2541    ) -> MatchCandidate {
2542        extend_backwards_shared(
2543            self.live_history(),
2544            self.history_abs_start,
2545            candidate_pos,
2546            abs_pos,
2547            match_len,
2548            lit_len,
2549        )
2550    }
2551
2552    fn insert_positions(&mut self, start: usize, end: usize) {
2553        for pos in start..end {
2554            self.insert_position(pos);
2555        }
2556    }
2557
2558    fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
2559        if step <= 1 {
2560            self.insert_positions(start, end);
2561            return;
2562        }
2563        let mut pos = start;
2564        while pos < end {
2565            self.insert_position(pos);
2566            let next = pos.saturating_add(step);
2567            if next <= pos {
2568                break;
2569            }
2570            pos = next;
2571        }
2572    }
2573
2574    #[inline]
2575    fn insert_position(&mut self, abs_pos: usize) {
2576        let Some((row, tag)) = self.hash_and_row(abs_pos) else {
2577            return;
2578        };
2579        let row_entries = 1usize << self.row_log;
2580        let row_mask = row_entries - 1;
2581        let row_base = row << self.row_log;
2582        // SAFETY: `hash_and_row` masks `row` to `row_hash_log` bits and
2583        // `row_heads.len() == 1 << row_hash_log` by `ensure_tables`.
2584        // `row_base = row << row_log = row * row_entries` and
2585        // `next < row_entries`, so `row_base + next < row_count *
2586        // row_entries == row_positions.len() == row_tags.len()`. Both
2587        // index pairs are provably in bounds; per-byte hot path on
2588        // fast/dfast/row levels saves ~6 instructions and 3 branches.
2589        debug_assert!(row < self.row_heads.len());
2590        debug_assert!(row_base + row_entries <= self.row_positions.len());
2591        unsafe {
2592            let head = *self.row_heads.get_unchecked(row) as usize;
2593            let next = head.wrapping_sub(1) & row_mask;
2594            *self.row_heads.get_unchecked_mut(row) = next as u8;
2595            *self.row_tags.get_unchecked_mut(row_base + next) = tag;
2596            *self.row_positions.get_unchecked_mut(row_base + next) = abs_pos;
2597        }
2598    }
2599}
2600
2601struct HcMatchGenerator {
2602    max_window_size: usize,
2603    window: VecDeque<Vec<u8>>,
2604    window_size: usize,
2605    history: Vec<u8>,
2606    history_start: usize,
2607    history_abs_start: usize,
2608    position_base: usize,
2609    index_shift: usize,
2610    offset_hist: [u32; 3],
2611    hash_table: Vec<u32>,
2612    hash3_table: Vec<u32>,
2613    chain_table: Vec<u32>,
2614    lazy_depth: u8,
2615    hash_log: usize,
2616    chain_log: usize,
2617    search_depth: usize,
2618    target_len: usize,
2619    parse_mode: HcParseMode,
2620    ldm_sequences: Vec<HcRawSeq>,
2621    next_to_update3: usize,
2622    hash3_log: usize,
2623    skip_insert_until_abs: usize,
2624    dictionary_limit_abs: Option<usize>,
2625    dictionary_primed_for_frame: bool,
2626    allow_zero_relative_position: bool,
2627    opt_state: HcOptState,
2628    opt_nodes_scratch: Vec<HcOptimalNode>,
2629    opt_candidates_scratch: Vec<MatchCandidate>,
2630    opt_store_scratch: Vec<HcOptimalNode>,
2631    opt_segment_plan_scratch: Vec<HcOptimalSequence>,
2632    opt_seed_plan_scratch: Vec<HcOptimalSequence>,
2633    opt_ll_price_scratch: Vec<u32>,
2634    opt_ll_price_generation: Vec<u32>,
2635    opt_ll_price_stamp: u32,
2636    opt_lit_price_scratch: [u32; HC_MAX_LIT + 1],
2637    opt_lit_price_generation: [u32; HC_MAX_LIT + 1],
2638    opt_lit_price_stamp: u32,
2639    opt_ml_price_scratch: Vec<u32>,
2640    opt_ml_price_generation: Vec<u32>,
2641    opt_ml_price_stamp: u32,
2642}
2643
2644#[derive(Copy, Clone, Debug)]
2645struct HcOptimalNode {
2646    price: u32,
2647    off: u32,
2648    mlen: u32,
2649    litlen: u32,
2650    reps: [u32; 3],
2651}
2652
2653impl Default for HcOptimalNode {
2654    fn default() -> Self {
2655        Self {
2656            price: u32::MAX,
2657            off: 0,
2658            mlen: 0,
2659            // Donor parity: uninitialized DP slots use litlen != 0
2660            // (C code uses !0) so they are never treated as end-of-match.
2661            litlen: u32::MAX,
2662            reps: [1, 4, 8],
2663        }
2664    }
2665}
2666
2667#[derive(Copy, Clone)]
2668struct HcOptimalSequence {
2669    offset: u32,
2670    match_len: u32,
2671    lit_len: u32,
2672}
2673
2674#[derive(Copy, Clone)]
2675struct HcRawSeq {
2676    lit_length: usize,
2677    offset: usize,
2678    match_length: usize,
2679}
2680
2681#[derive(Copy, Clone, Default)]
2682struct HcRawSeqStore {
2683    pos: usize,
2684    pos_in_sequence: usize,
2685    size: usize,
2686}
2687
2688#[derive(Copy, Clone)]
2689struct HcOptLdmState {
2690    seq_store: HcRawSeqStore,
2691    start_pos_in_block: usize,
2692    end_pos_in_block: usize,
2693    offset: usize,
2694}
2695
2696impl Default for HcOptLdmState {
2697    fn default() -> Self {
2698        Self {
2699            seq_store: HcRawSeqStore::default(),
2700            start_pos_in_block: usize::MAX,
2701            end_pos_in_block: usize::MAX,
2702            offset: 0,
2703        }
2704    }
2705}
2706
2707#[derive(Copy, Clone)]
2708struct HcCandidateQuery {
2709    reps: [u32; 3],
2710    lit_len: usize,
2711    ldm_candidate: Option<MatchCandidate>,
2712}
2713
2714#[derive(Copy, Clone)]
2715struct HcOptimalPlanState {
2716    reps: [u32; 3],
2717    litlen: usize,
2718    profile: HcOptimalCostProfile,
2719}
2720
2721struct HcOptimalPlanBuffers {
2722    nodes: Vec<HcOptimalNode>,
2723    candidates: Vec<MatchCandidate>,
2724    store: Vec<HcOptimalNode>,
2725    ll_prices: Vec<u32>,
2726    ll_price_generations: Vec<u32>,
2727    ml_prices: Vec<u32>,
2728    ml_price_generations: Vec<u32>,
2729}
2730
2731#[derive(Copy, Clone)]
2732enum HcOptPriceType {
2733    Dynamic,
2734    Predefined,
2735}
2736
2737#[derive(Clone)]
2738struct HcDictEntropySeed {
2739    has_lit: bool,
2740    has_ll: bool,
2741    has_ml: bool,
2742    has_of: bool,
2743    lit_bits: [u8; HC_MAX_LIT + 1],
2744    ll_bits: [u8; HC_MAX_LL + 1],
2745    ml_bits: [u8; HC_MAX_ML + 1],
2746    of_bits: [u8; HC_MAX_OFF + 1],
2747}
2748
2749const HC_MAX_LIT: usize = 255;
2750const HC_MAX_LL: usize = 35;
2751const HC_MAX_ML: usize = 52;
2752const HC_MAX_OFF: usize = 31;
2753const HC_LITFREQ_ADD: u32 = 2;
2754const HC_PREDEF_THRESHOLD: usize = 8;
2755const HC_BITCOST_MULTIPLIER: u32 = 1 << 8;
2756const HC_BLOCKSIZE_MAX: usize = crate::common::MAX_BLOCK_SIZE as usize;
2757const HC_OPT_NUM: usize = 1 << 12;
2758const HC_FORMAT_MINMATCH: usize = 3;
2759#[derive(Clone)]
2760struct HcOptState {
2761    lit_freq: [u32; HC_MAX_LIT + 1],
2762    lit_length_freq: [u32; HC_MAX_LL + 1],
2763    match_length_freq: [u32; HC_MAX_ML + 1],
2764    off_code_freq: [u32; HC_MAX_OFF + 1],
2765    lit_sum: u32,
2766    lit_length_sum: u32,
2767    match_length_sum: u32,
2768    off_code_sum: u32,
2769    lit_sum_base_price: u32,
2770    lit_length_sum_base_price: u32,
2771    match_length_sum_base_price: u32,
2772    off_code_sum_base_price: u32,
2773    price_type: HcOptPriceType,
2774    literals_compressed: bool,
2775    dictionary_seed: Option<HcDictEntropySeed>,
2776}
2777
2778impl HcOptState {
2779    fn new() -> Self {
2780        Self {
2781            lit_freq: [0; HC_MAX_LIT + 1],
2782            lit_length_freq: [0; HC_MAX_LL + 1],
2783            match_length_freq: [0; HC_MAX_ML + 1],
2784            off_code_freq: [0; HC_MAX_OFF + 1],
2785            lit_sum: 0,
2786            lit_length_sum: 0,
2787            match_length_sum: 0,
2788            off_code_sum: 0,
2789            lit_sum_base_price: 0,
2790            lit_length_sum_base_price: 0,
2791            match_length_sum_base_price: 0,
2792            off_code_sum_base_price: 0,
2793            price_type: HcOptPriceType::Dynamic,
2794            literals_compressed: true,
2795            dictionary_seed: None,
2796        }
2797    }
2798
2799    fn reset(&mut self) {
2800        *self = Self::new();
2801    }
2802
2803    fn bit_weight(stat: u32) -> u32 {
2804        // Donor parity: stat+1 ≥ 1 ⇒ leading_zeros ≤ 31 ⇒ 31-lz ≥ 0.
2805        // hb ≤ 31, MULTIPLIER = 256 ⇒ product ≤ 7936, no overflow.
2806        let hb = 31 - (stat + 1).leading_zeros();
2807        hb * HC_BITCOST_MULTIPLIER
2808    }
2809
2810    fn frac_weight(raw_stat: u32) -> u32 {
2811        // Donor parity (ZSTD_fracWeight). All operands bounded: stat fits u32,
2812        // hb ∈ 0..=31, BWeight ≤ 7936, FWeight ≤ 65535 ⇒ sum no overflow.
2813        let stat = raw_stat + 1;
2814        let hb = 31 - stat.leading_zeros();
2815        let b_weight = hb * HC_BITCOST_MULTIPLIER;
2816        let f_weight = (stat << 8) >> hb;
2817        b_weight + f_weight
2818    }
2819
2820    fn weight(stat: u32, accurate: bool) -> u32 {
2821        if accurate {
2822            Self::frac_weight(stat)
2823        } else {
2824            Self::bit_weight(stat)
2825        }
2826    }
2827
2828    fn downscale_stats(table: &mut [u32], shift: u32, base1: bool) -> u32 {
2829        // Donor parity: table sums bounded by block size (≤ 256K) ⇒ no overflow.
2830        let mut sum = 0u32;
2831        for stat in table {
2832            let base = if base1 { 1 } else { u32::from(*stat > 0) };
2833            let new_stat = base + (*stat >> shift);
2834            *stat = new_stat;
2835            sum += new_stat;
2836        }
2837        sum
2838    }
2839
2840    fn scale_stats(table: &mut [u32], log_target: u32) -> u32 {
2841        let prev_sum = table.iter().copied().sum::<u32>();
2842        let factor = prev_sum >> log_target;
2843        if factor <= 1 {
2844            return prev_sum;
2845        }
2846        // factor ≥ 2 ⇒ leading_zeros ≤ 30 ⇒ 31-lz ≥ 1.
2847        let shift = 31 - factor.leading_zeros();
2848        Self::downscale_stats(table, shift, true)
2849    }
2850
2851    fn set_base_prices(&mut self, accurate: bool) {
2852        self.lit_sum_base_price = if self.literals_compressed() {
2853            Self::weight(self.lit_sum, accurate)
2854        } else {
2855            0
2856        };
2857        self.lit_length_sum_base_price = Self::weight(self.lit_length_sum, accurate);
2858        self.match_length_sum_base_price = Self::weight(self.match_length_sum, accurate);
2859        self.off_code_sum_base_price = Self::weight(self.off_code_sum, accurate);
2860    }
2861
2862    fn literals_compressed(&self) -> bool {
2863        self.literals_compressed
2864    }
2865
2866    #[cfg(test)]
2867    fn set_literals_compressed_for_tests(&mut self, enabled: bool) {
2868        self.literals_compressed = enabled;
2869    }
2870
2871    fn seed_dictionary_entropy(
2872        &mut self,
2873        huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
2874        ll: Option<&crate::fse::fse_encoder::FSETable>,
2875        ml: Option<&crate::fse::fse_encoder::FSETable>,
2876        of: Option<&crate::fse::fse_encoder::FSETable>,
2877    ) {
2878        if huff.is_none() && ll.is_none() && ml.is_none() && of.is_none() {
2879            self.dictionary_seed = None;
2880            return;
2881        }
2882        let mut lit_bits = [0u8; HC_MAX_LIT + 1];
2883        if let Some(huff) = huff {
2884            for (sym, slot) in lit_bits.iter_mut().enumerate() {
2885                *slot = huff.num_bits_for_symbol(sym as u8).unwrap_or(0);
2886            }
2887        }
2888        let mut ll_bits = [0u8; HC_MAX_LL + 1];
2889        if let Some(ll) = ll {
2890            for (sym, slot) in ll_bits.iter_mut().enumerate() {
2891                *slot = ll.max_num_bits_for_symbol(sym as u8).unwrap_or(0);
2892            }
2893        }
2894        let mut ml_bits = [0u8; HC_MAX_ML + 1];
2895        if let Some(ml) = ml {
2896            for (sym, slot) in ml_bits.iter_mut().enumerate() {
2897                *slot = ml.max_num_bits_for_symbol(sym as u8).unwrap_or(0);
2898            }
2899        }
2900        let mut of_bits = [0u8; HC_MAX_OFF + 1];
2901        if let Some(of) = of {
2902            for (sym, slot) in of_bits.iter_mut().enumerate() {
2903                *slot = of.max_num_bits_for_symbol(sym as u8).unwrap_or(0);
2904            }
2905        }
2906        self.dictionary_seed = Some(HcDictEntropySeed {
2907            has_lit: huff.is_some(),
2908            has_ll: ll.is_some(),
2909            has_ml: ml.is_some(),
2910            has_of: of.is_some(),
2911            lit_bits,
2912            ll_bits,
2913            ml_bits,
2914            of_bits,
2915        });
2916    }
2917
2918    fn apply_seeded_table<const N: usize>(
2919        table: &mut [u32; N],
2920        bits: &[u8; N],
2921        scale_log: u8,
2922    ) -> u32 {
2923        let mut sum = 0u32;
2924        for (slot, &bit_cost) in table.iter_mut().zip(bits.iter()) {
2925            let value = if bit_cost == 0 || bit_cost >= scale_log {
2926                1
2927            } else {
2928                1u32 << (scale_log - bit_cost)
2929            };
2930            *slot = value;
2931            sum += value;
2932        }
2933        sum
2934    }
2935
2936    #[inline(always)]
2937    fn lit_code_and_bits(lit_len: usize) -> (usize, u32) {
2938        let ll = lit_len.min(131_071) as u32;
2939        let (code, _, extra_bits) = match ll {
2940            0..=15 => (ll as u8, 0, 0),
2941            16..=17 => (16, ll - 16, 1),
2942            18..=19 => (17, ll - 18, 1),
2943            20..=21 => (18, ll - 20, 1),
2944            22..=23 => (19, ll - 22, 1),
2945            24..=27 => (20, ll - 24, 2),
2946            28..=31 => (21, ll - 28, 2),
2947            32..=39 => (22, ll - 32, 3),
2948            40..=47 => (23, ll - 40, 3),
2949            48..=63 => (24, ll - 48, 4),
2950            64..=127 => (25, ll - 64, 6),
2951            128..=255 => (26, ll - 128, 7),
2952            256..=511 => (27, ll - 256, 8),
2953            512..=1023 => (28, ll - 512, 9),
2954            1024..=2047 => (29, ll - 1024, 10),
2955            2048..=4095 => (30, ll - 2048, 11),
2956            4096..=8191 => (31, ll - 4096, 12),
2957            8192..=16383 => (32, ll - 8192, 13),
2958            16384..=32767 => (33, ll - 16384, 14),
2959            32768..=65535 => (34, ll - 32768, 15),
2960            _ => (35, ll - 65536, 16),
2961        };
2962        (code as usize, extra_bits as u32)
2963    }
2964
2965    #[inline(always)]
2966    fn ml_code_and_bits(match_len: usize) -> (usize, u32) {
2967        let ml = match_len.clamp(3, 131_074) as u32;
2968        let (code, _, extra_bits) = match ml {
2969            3..=34 => (ml as u8 - 3, 0, 0),
2970            35..=36 => (32, ml - 35, 1),
2971            37..=38 => (33, ml - 37, 1),
2972            39..=40 => (34, ml - 39, 1),
2973            41..=42 => (35, ml - 41, 1),
2974            43..=46 => (36, ml - 43, 2),
2975            47..=50 => (37, ml - 47, 2),
2976            51..=58 => (38, ml - 51, 3),
2977            59..=66 => (39, ml - 59, 3),
2978            67..=82 => (40, ml - 67, 4),
2979            83..=98 => (41, ml - 83, 4),
2980            99..=130 => (42, ml - 99, 5),
2981            131..=258 => (43, ml - 131, 7),
2982            259..=514 => (44, ml - 259, 8),
2983            515..=1026 => (45, ml - 515, 9),
2984            1027..=2050 => (46, ml - 1027, 10),
2985            2051..=4098 => (47, ml - 2051, 11),
2986            4099..=8194 => (48, ml - 4099, 12),
2987            8195..=16386 => (49, ml - 8195, 13),
2988            16387..=32770 => (50, ml - 16387, 14),
2989            32771..=65538 => (51, ml - 32771, 15),
2990            _ => (52, ml - 65539, 16),
2991        };
2992        (code as usize, extra_bits as u32)
2993    }
2994
2995    fn rescale_freqs(&mut self, src: &[u8], profile: HcOptimalCostProfile) {
2996        self.price_type = HcOptPriceType::Dynamic;
2997        if self.lit_length_sum == 0 {
2998            if src.len() <= HC_PREDEF_THRESHOLD {
2999                self.price_type = HcOptPriceType::Predefined;
3000            }
3001            if let Some(seed) = self.dictionary_seed.take() {
3002                if seed.has_lit || seed.has_ll || seed.has_ml || seed.has_of {
3003                    self.price_type = HcOptPriceType::Dynamic;
3004                }
3005                if seed.has_lit && self.literals_compressed() {
3006                    self.lit_sum = Self::apply_seeded_table(&mut self.lit_freq, &seed.lit_bits, 11);
3007                } else {
3008                    if self.literals_compressed() {
3009                        self.lit_freq.fill(0);
3010                        for &byte in src {
3011                            self.lit_freq[byte as usize] =
3012                                self.lit_freq[byte as usize].saturating_add(1);
3013                        }
3014                        self.lit_sum = Self::downscale_stats(&mut self.lit_freq, 8, false);
3015                        if self.lit_sum == 0 {
3016                            self.lit_freq[0] = 1;
3017                            self.lit_sum = 1;
3018                        }
3019                    } else {
3020                        self.lit_freq.fill(0);
3021                        self.lit_sum = 0;
3022                    }
3023                }
3024
3025                if seed.has_ll {
3026                    self.lit_length_sum =
3027                        Self::apply_seeded_table(&mut self.lit_length_freq, &seed.ll_bits, 10);
3028                } else {
3029                    let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
3030                        4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3031                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3032                    ];
3033                    self.lit_length_freq = base_ll_freqs;
3034                    self.lit_length_sum = base_ll_freqs.iter().copied().sum();
3035                }
3036
3037                if seed.has_ml {
3038                    self.match_length_sum =
3039                        Self::apply_seeded_table(&mut self.match_length_freq, &seed.ml_bits, 10);
3040                } else {
3041                    self.match_length_freq.fill(1);
3042                    self.match_length_sum = (HC_MAX_ML + 1) as u32;
3043                }
3044
3045                if seed.has_of {
3046                    self.off_code_sum =
3047                        Self::apply_seeded_table(&mut self.off_code_freq, &seed.of_bits, 10);
3048                } else {
3049                    let base_off_freqs: [u32; HC_MAX_OFF + 1] = [
3050                        6, 2, 1, 1, 2, 3, 4, 4, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3051                        1, 1, 1, 1, 1, 1, 1,
3052                    ];
3053                    self.off_code_freq = base_off_freqs;
3054                    self.off_code_sum = base_off_freqs.iter().copied().sum();
3055                }
3056            } else {
3057                if self.literals_compressed() {
3058                    self.lit_freq.fill(0);
3059                    for &byte in src {
3060                        self.lit_freq[byte as usize] =
3061                            self.lit_freq[byte as usize].saturating_add(1);
3062                    }
3063                    self.lit_sum = Self::downscale_stats(&mut self.lit_freq, 8, false);
3064                    if self.lit_sum == 0 {
3065                        self.lit_freq[0] = 1;
3066                        self.lit_sum = 1;
3067                    }
3068                } else {
3069                    self.lit_freq.fill(0);
3070                    self.lit_sum = 0;
3071                }
3072
3073                let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
3074                    4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3075                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3076                ];
3077                self.lit_length_freq = base_ll_freqs;
3078                self.lit_length_sum = base_ll_freqs.iter().copied().sum();
3079
3080                self.match_length_freq.fill(1);
3081                self.match_length_sum = (HC_MAX_ML + 1) as u32;
3082
3083                let base_off_freqs: [u32; HC_MAX_OFF + 1] = [
3084                    6, 2, 1, 1, 2, 3, 4, 4, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3085                    1, 1, 1, 1, 1, 1,
3086                ];
3087                self.off_code_freq = base_off_freqs;
3088                self.off_code_sum = base_off_freqs.iter().copied().sum();
3089            }
3090        } else {
3091            if self.literals_compressed() {
3092                self.lit_sum = Self::scale_stats(&mut self.lit_freq, 12);
3093            }
3094            self.lit_length_sum = Self::scale_stats(&mut self.lit_length_freq, 11);
3095            self.match_length_sum = Self::scale_stats(&mut self.match_length_freq, 11);
3096            self.off_code_sum = Self::scale_stats(&mut self.off_code_freq, 11);
3097        }
3098        self.set_base_prices(profile.accurate);
3099    }
3100
3101    fn update_stats(&mut self, lit_len: usize, literals: &[u8], off_base: u32, match_len: usize) {
3102        // Donor parity (ZSTD_updateStats). All freq sums bounded by
3103        // block_size * LITFREQ_ADD ≤ 256K * 2 = 512K ≪ u32::MAX.
3104        if self.literals_compressed() {
3105            for &byte in literals.iter().take(lit_len) {
3106                self.lit_freq[byte as usize] += HC_LITFREQ_ADD;
3107            }
3108            self.lit_sum += (lit_len as u32) * HC_LITFREQ_ADD;
3109        }
3110
3111        let (ll_code, _) = Self::lit_code_and_bits(lit_len);
3112        self.lit_length_freq[ll_code] += 1;
3113        self.lit_length_sum += 1;
3114
3115        // off_base ≥ 1 ⇒ leading_zeros ≤ 31 ⇒ 31-lz ≥ 0.
3116        let off_code = ((31 - off_base.max(1).leading_zeros()) as usize).min(HC_MAX_OFF);
3117        self.off_code_freq[off_code] += 1;
3118        self.off_code_sum += 1;
3119
3120        let (ml_code, _) = Self::ml_code_and_bits(match_len);
3121        self.match_length_freq[ml_code] += 1;
3122        self.match_length_sum += 1;
3123    }
3124}
3125
3126#[derive(Copy, Clone)]
3127struct HcOptimalCostProfile {
3128    max_chain_depth: usize,
3129    sufficient_match_len: usize,
3130    accurate: bool,
3131    favor_small_offsets: bool,
3132}
3133
3134impl HcOptimalCostProfile {
3135    fn for_mode(mode: HcParseMode, pass2: bool) -> Self {
3136        match mode {
3137            HcParseMode::Lazy2 => Self {
3138                max_chain_depth: 8,
3139                sufficient_match_len: 32,
3140                accurate: false,
3141                favor_small_offsets: true,
3142            },
3143            HcParseMode::BtOpt => Self {
3144                max_chain_depth: 32,
3145                sufficient_match_len: usize::MAX,
3146                accurate: false,
3147                favor_small_offsets: true,
3148            },
3149            HcParseMode::BtUltra => Self {
3150                max_chain_depth: 32,
3151                sufficient_match_len: usize::MAX,
3152                accurate: true,
3153                favor_small_offsets: false,
3154            },
3155            HcParseMode::BtUltra2 => {
3156                let _ = pass2;
3157                Self {
3158                    max_chain_depth: 512,
3159                    sufficient_match_len: usize::MAX,
3160                    accurate: true,
3161                    // Donor opt2 path doesn't apply the small-offset
3162                    // decompression-speed handicap.
3163                    favor_small_offsets: false,
3164                }
3165            }
3166        }
3167    }
3168
3169    fn literal_price(&self, stats: &HcOptState, byte: u8) -> u32 {
3170        if !stats.literals_compressed() {
3171            return 8 * HC_BITCOST_MULTIPLIER;
3172        }
3173        if matches!(stats.price_type, HcOptPriceType::Predefined) {
3174            return 6 * HC_BITCOST_MULTIPLIER;
3175        }
3176        // Donor parity: ZSTD_rawLiteralsCost asserts lit_sum_base_price ≥
3177        // BITCOST_MULTIPLIER, then clamps lit_weight to that range, so the
3178        // final subtract never underflows.
3179        debug_assert!(stats.lit_sum_base_price >= HC_BITCOST_MULTIPLIER);
3180        let lit_max = stats.lit_sum_base_price - HC_BITCOST_MULTIPLIER;
3181        let mut lit_weight = HcOptState::weight(stats.lit_freq[byte as usize], self.accurate);
3182        if lit_weight > lit_max {
3183            lit_weight = lit_max;
3184        }
3185        stats.lit_sum_base_price - lit_weight
3186    }
3187
3188    fn lit_length_price(&self, stats: &HcOptState, lit_len: usize) -> u32 {
3189        if lit_len == HC_BLOCKSIZE_MAX {
3190            // Donor parity: ZSTD_litLengthPrice() handles the non-representable
3191            // BLOCKSIZE_MAX literal-length by charging one extra bit over the
3192            // largest encodable litLength symbol.
3193            return HC_BITCOST_MULTIPLIER
3194                + self.lit_length_price(stats, HC_BLOCKSIZE_MAX.saturating_sub(1));
3195        }
3196        if matches!(stats.price_type, HcOptPriceType::Predefined) {
3197            return HcOptState::weight(lit_len as u32, self.accurate);
3198        }
3199        // ll_bits ≤ 16 ⇒ ll_bits * 256 ≤ 4096, sum no overflow.
3200        let (ll_code, ll_bits) = HcOptState::lit_code_and_bits(lit_len);
3201        ll_bits * HC_BITCOST_MULTIPLIER + stats.lit_length_sum_base_price
3202            - HcOptState::weight(stats.lit_length_freq[ll_code], self.accurate)
3203    }
3204
3205    #[inline(always)]
3206    fn offset_price_for<const ACCURATE_PRICE: bool, const FAVOR_SMALL_OFFSETS: bool>(
3207        &self,
3208        stats: &HcOptState,
3209        off_base: u32,
3210    ) -> u32 {
3211        // Donor parity (ZSTD_getMatchPrice). off_base ≥ 1 ⇒ leading_zeros ≤ 31.
3212        // off_code ≤ 31, (16 + off_code) * 256 ≤ 12032, sums no overflow.
3213        let off_code = 31 - off_base.max(1).leading_zeros();
3214        if matches!(stats.price_type, HcOptPriceType::Predefined) {
3215            return (16 + off_code) * HC_BITCOST_MULTIPLIER;
3216        }
3217        let mut price = off_code * HC_BITCOST_MULTIPLIER
3218            + (stats.off_code_sum_base_price
3219                - HcOptState::weight(stats.off_code_freq[off_code as usize], ACCURATE_PRICE));
3220        if FAVOR_SMALL_OFFSETS && off_code >= 20 {
3221            price += (off_code - 19) * 2 * HC_BITCOST_MULTIPLIER;
3222        }
3223        price
3224    }
3225
3226    #[inline(always)]
3227    fn match_length_price(&self, stats: &HcOptState, match_len: usize) -> u32 {
3228        // Donor parity: mlBase = match_len - MINMATCH; callers guarantee
3229        // match_len ≥ HC_FORMAT_MINMATCH. ml_bits ≤ 16, * 256 ≤ 4096.
3230        debug_assert!(match_len >= HC_FORMAT_MINMATCH);
3231        let ml_base = match_len - HC_FORMAT_MINMATCH;
3232        if matches!(stats.price_type, HcOptPriceType::Predefined) {
3233            return HcOptState::weight(ml_base as u32, self.accurate);
3234        }
3235        let (ml_code, ml_bits) = HcOptState::ml_code_and_bits(match_len);
3236        ml_bits * HC_BITCOST_MULTIPLIER
3237            + (stats.match_length_sum_base_price
3238                - HcOptState::weight(stats.match_length_freq[ml_code], self.accurate))
3239    }
3240
3241    #[inline(always)]
3242    fn match_price_from_parts(&self, off_price: u32, ml_price: u32, stats: &HcOptState) -> u32 {
3243        let mut price = off_price + ml_price;
3244        debug_assert!(price >= off_price);
3245        if !matches!(stats.price_type, HcOptPriceType::Predefined) {
3246            price += HC_BITCOST_MULTIPLIER / 5;
3247            debug_assert!(price >= off_price);
3248        }
3249        price
3250    }
3251}
3252
3253/// `bt_insert_step_no_rebase` body parameterized over the per-CPU
3254/// `count_match_from_indices` symbol. Each kernel-specific wrapper invokes
3255/// the macro with its own `fastpath::<kernel>::count_match_from_indices`
3256/// path so the call resolves inside the wrapper's `#[target_feature]`
3257/// umbrella and inlines instead of paying the function-call ABI per BT walk
3258/// iteration. Used only by `HcMatchGenerator` BT walk wrappers below.
3259macro_rules! bt_insert_step_no_rebase_body {
3260    ($self:expr, $abs_pos:ident, $current_abs_end:ident, $target_abs:ident, $cmf:path) => {{
3261        let idx = $abs_pos - $self.history_abs_start;
3262        let concat = &$self.history[$self.history_start..];
3263        if idx + 8 > concat.len() {
3264            return 1;
3265        }
3266        let tail_limit = $current_abs_end.saturating_sub($abs_pos);
3267        let hash =
3268            HcMatchGenerator::hash_position_at(concat, idx, $self.hash_log, $self.bt_hash_mls());
3269        let Some(relative_pos) = $self.relative_position($abs_pos) else {
3270            return 1;
3271        };
3272        let stored = relative_pos + 1;
3273        let bt_mask = $self.bt_mask();
3274        let bt_low = $abs_pos.saturating_sub(bt_mask);
3275        let window_low = $self.window_low_abs_for_target($target_abs);
3276        let mut match_end_abs = $abs_pos.saturating_add(9);
3277        let mut best_len = 8usize;
3278        let mut compares_left = $self.search_depth;
3279        let mut common_length_smaller = 0usize;
3280        let mut common_length_larger = 0usize;
3281        let pair_idx = $self.bt_pair_index_for_abs($abs_pos);
3282        let mut smaller_slot = pair_idx;
3283        let mut larger_slot = pair_idx + 1;
3284        let mut match_stored = $self.hash_table[hash];
3285        $self.hash_table[hash] = stored;
3286
3287        while compares_left > 0 {
3288            let Some(candidate_abs) = HcMatchGenerator::stored_abs_position_fast(
3289                match_stored,
3290                $self.position_base,
3291                $self.index_shift,
3292            ) else {
3293                break;
3294            };
3295            if candidate_abs < window_low || candidate_abs >= $abs_pos {
3296                break;
3297            }
3298            compares_left -= 1;
3299
3300            let next_pair_idx = $self.bt_pair_index_for_abs(candidate_abs);
3301            let next_smaller = $self.chain_table[next_pair_idx];
3302            let next_larger = $self.chain_table[next_pair_idx + 1];
3303            let seed_len = common_length_smaller.min(common_length_larger);
3304            let candidate_idx = candidate_abs - $self.history_abs_start;
3305            // SAFETY: BT walk invariant — `candidate_idx + tail_limit ≤
3306            // concat.len()` since the candidate is within
3307            // `[history_abs_start, abs_pos)` and `tail_limit ≤
3308            // current_abs_end - abs_pos`.
3309            let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
3310
3311            if match_len > best_len {
3312                best_len = match_len;
3313                let candidate_end = candidate_abs.saturating_add(match_len);
3314                if candidate_end > match_end_abs {
3315                    match_end_abs = candidate_end;
3316                }
3317            }
3318
3319            if match_len >= tail_limit {
3320                break;
3321            }
3322
3323            let candidate_next = candidate_idx + match_len;
3324            let current_next = idx + match_len;
3325            if concat[candidate_next] < concat[current_next] {
3326                $self.chain_table[smaller_slot] = match_stored;
3327                common_length_smaller = match_len;
3328                if candidate_abs <= bt_low {
3329                    smaller_slot = usize::MAX;
3330                    break;
3331                }
3332                smaller_slot = next_pair_idx + 1;
3333                match_stored = next_larger;
3334            } else {
3335                $self.chain_table[larger_slot] = match_stored;
3336                common_length_larger = match_len;
3337                if candidate_abs <= bt_low {
3338                    larger_slot = usize::MAX;
3339                    break;
3340                }
3341                larger_slot = next_pair_idx;
3342                match_stored = next_smaller;
3343            }
3344        }
3345
3346        if smaller_slot != usize::MAX {
3347            $self.chain_table[smaller_slot] = HC_EMPTY;
3348        }
3349        if larger_slot != usize::MAX {
3350            $self.chain_table[larger_slot] = HC_EMPTY;
3351        }
3352
3353        let speed_positions = if best_len > 384 {
3354            (best_len - 384).min(192)
3355        } else {
3356            0
3357        };
3358        speed_positions.max(match_end_abs.saturating_sub($abs_pos.saturating_add(8)))
3359    }};
3360}
3361
3362/// `build_optimal_plan_impl` body parameterized over the per-CPU
3363/// `collect_optimal_candidates_initialized_<kernel>` method name. Caller
3364/// passes its `&mut self`, the seven DP entry-point arguments, and the
3365/// kernel-specific collect method. Each per-kernel wrapper invokes this
3366/// macro inside its own `#[target_feature]` umbrella so the per-position
3367/// `$collect` call inlines and the entire DP loop runs as one straight-line
3368/// hot path without an ABI barrier between the DP and the match-gathering
3369/// pipeline.
3370///
3371/// Body is ~730 lines but mechanically identical across kernels — the macro
3372/// keeps a single source of truth. The two const generics
3373/// (`ACCURATE_PRICE`, `FAVOR_SMALL_OFFSETS`) come from the wrapper's
3374/// generic parameter list and are referenced as bare identifiers; macro
3375/// hygiene resolves them at the expansion site.
3376macro_rules! build_optimal_plan_impl_body {
3377    (
3378        $self:expr,
3379        $current:ident,
3380        $current_abs_start:ident,
3381        $current_len:ident,
3382        $initial_state:ident,
3383        $stats:ident,
3384        $out:ident,
3385        $collect:ident $(,)?
3386    ) => {{
3387        let current_abs_end = $current_abs_start + $current_len;
3388        let min_match_len = HC_OPT_MIN_MATCH_LEN;
3389        let frontier_limit = $current_len.min(HC_OPT_NUM.saturating_sub(1));
3390        let initial_reps = $initial_state.reps;
3391        let initial_litlen = $initial_state.litlen;
3392        let mut profile = $initial_state.profile;
3393        profile.sufficient_match_len = $self.sufficient_match_len_for_pass(profile);
3394        let abort_on_worse_match = $self.parse_mode == HcParseMode::BtOpt;
3395        let opt_level = matches!(
3396            $self.parse_mode,
3397            HcParseMode::BtUltra | HcParseMode::BtUltra2
3398        );
3399        let mut nodes = core::mem::take(&mut $self.opt_nodes_scratch);
3400        if nodes.len() < frontier_limit.saturating_add(2) {
3401            nodes.resize(frontier_limit.saturating_add(2), HcOptimalNode::default());
3402        }
3403        let mut candidates = core::mem::take(&mut $self.opt_candidates_scratch);
3404        candidates.clear();
3405        if candidates.capacity() < MAX_HC_SEARCH_DEPTH {
3406            candidates.reserve_exact(MAX_HC_SEARCH_DEPTH - candidates.capacity());
3407        }
3408        let mut store = core::mem::take(&mut $self.opt_store_scratch);
3409        store.clear();
3410        let mut ll_prices = core::mem::take(&mut $self.opt_ll_price_scratch);
3411        let mut ll_price_generations = core::mem::take(&mut $self.opt_ll_price_generation);
3412        if ll_prices.len() <= frontier_limit {
3413            ll_prices.resize(frontier_limit + 1, 0);
3414            ll_price_generations.resize(frontier_limit + 1, 0);
3415        }
3416        $self.opt_ll_price_stamp = $self.opt_ll_price_stamp.wrapping_add(1).max(1);
3417        let ll_price_stamp = $self.opt_ll_price_stamp;
3418        $self.opt_lit_price_stamp = $self.opt_lit_price_stamp.wrapping_add(1).max(1);
3419        let lit_price_stamp = $self.opt_lit_price_stamp;
3420        let mut ml_prices = core::mem::take(&mut $self.opt_ml_price_scratch);
3421        let mut ml_price_generations = core::mem::take(&mut $self.opt_ml_price_generation);
3422        if ml_prices.len() <= frontier_limit {
3423            ml_prices.resize(frontier_limit + 1, 0);
3424            ml_price_generations.resize(frontier_limit + 1, 0);
3425        }
3426        $self.opt_ml_price_stamp = $self.opt_ml_price_stamp.wrapping_add(1).max(1);
3427        let ml_price_stamp = $self.opt_ml_price_stamp;
3428        nodes[0] = HcOptimalNode {
3429            price: HcMatchGenerator::cached_lit_length_price(
3430                profile,
3431                $stats,
3432                initial_litlen,
3433                &mut ll_prices,
3434                &mut ll_price_generations,
3435                ll_price_stamp,
3436            ),
3437            litlen: initial_litlen as u32,
3438            reps: initial_reps,
3439            ..HcOptimalNode::default()
3440        };
3441        let sufficient_len = profile.sufficient_match_len;
3442        let ll0_price = HcMatchGenerator::cached_lit_length_price(
3443            profile,
3444            $stats,
3445            0,
3446            &mut ll_prices,
3447            &mut ll_price_generations,
3448            ll_price_stamp,
3449        );
3450        let ll1_price = HcMatchGenerator::cached_lit_length_price(
3451            profile,
3452            $stats,
3453            1,
3454            &mut ll_prices,
3455            &mut ll_price_generations,
3456            ll_price_stamp,
3457        );
3458        let mut pos = 1usize;
3459        let mut last_pos = 0usize;
3460        let mut forced_end: Option<usize> = None;
3461        let mut forced_end_state: Option<HcOptimalNode> = None;
3462        let mut seed_forced_shortest_path = false;
3463        let mut opt_ldm = HcOptLdmState {
3464            seq_store: HcRawSeqStore {
3465                pos: 0,
3466                pos_in_sequence: 0,
3467                size: $self.ldm_sequences.len(),
3468            },
3469            ..HcOptLdmState::default()
3470        };
3471        let has_ldm = !$self.ldm_sequences.is_empty();
3472        if has_ldm {
3473            $self.ldm_get_next_match_and_update_seq_store(&mut opt_ldm, 0, $current_len);
3474        }
3475
3476        // Donor-like seed at rPos=0: initialize frontier with matches starting
3477        // at current position before entering the generic forward DP loop.
3478        if $current_len >= min_match_len {
3479            let seed_ldm = if has_ldm {
3480                $self.ldm_process_match_candidate(&mut opt_ldm, 0, $current_len, min_match_len)
3481            } else {
3482                None
3483            };
3484            candidates.clear();
3485            // SAFETY: wrapper is in the same target_feature umbrella as the
3486            // `$collect` kernel variant; the runtime kernel detector already
3487            // gated entry into the wrapper.
3488            unsafe {
3489                $self.$collect::<true>(
3490                    $current_abs_start,
3491                    current_abs_end,
3492                    profile,
3493                    HcCandidateQuery {
3494                        reps: initial_reps,
3495                        lit_len: initial_litlen,
3496                        ldm_candidate: seed_ldm,
3497                    },
3498                    &mut candidates,
3499                )
3500            };
3501            if !candidates.is_empty() {
3502                last_pos = min_match_len.saturating_sub(1).min(frontier_limit);
3503                for p in 1..min_match_len.min(nodes.len()) {
3504                    HcMatchGenerator::reset_opt_node(&mut nodes[p]);
3505                    nodes[p].litlen = initial_litlen.saturating_add(p) as u32;
3506                }
3507            }
3508
3509            if let Some(candidate) = candidates.last() {
3510                let longest_len = candidate.match_len.min($current_len);
3511                if longest_len > sufficient_len {
3512                    let off_base = HcMatchGenerator::encode_offset_base_with_reps(
3513                        candidate.offset as u32,
3514                        initial_litlen,
3515                        initial_reps,
3516                    );
3517                    let off_price = profile
3518                        .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
3519                    let ml_price = HcMatchGenerator::cached_match_length_price(
3520                        profile,
3521                        $stats,
3522                        longest_len,
3523                        &mut ml_prices,
3524                        &mut ml_price_generations,
3525                        ml_price_stamp,
3526                    );
3527                    let seq_cost = HcMatchGenerator::add_prices(
3528                        ll0_price,
3529                        profile.match_price_from_parts(off_price, ml_price, $stats),
3530                    );
3531                    let forced_price = HcMatchGenerator::add_prices(nodes[0].price, seq_cost);
3532                    let forced_state = HcOptimalNode {
3533                        price: forced_price,
3534                        off: candidate.offset as u32,
3535                        mlen: longest_len as u32,
3536                        litlen: 0,
3537                        reps: initial_reps,
3538                    };
3539                    if longest_len < nodes.len() && forced_price < nodes[longest_len].price {
3540                        nodes[longest_len] = forced_state;
3541                    }
3542                    forced_end = Some(longest_len);
3543                    forced_end_state = Some(forced_state);
3544                    seed_forced_shortest_path = true;
3545                }
3546            }
3547            if !seed_forced_shortest_path {
3548                let mut prev_max_len = min_match_len.saturating_sub(1);
3549                for candidate in candidates.iter() {
3550                    let max_match_len = candidate.match_len.min(frontier_limit);
3551                    if max_match_len < min_match_len {
3552                        continue;
3553                    }
3554                    let start_len = prev_max_len.saturating_add(1).max(min_match_len);
3555                    if start_len > max_match_len {
3556                        prev_max_len = prev_max_len.max(max_match_len);
3557                        continue;
3558                    }
3559                    if max_match_len > last_pos {
3560                        HcMatchGenerator::reset_opt_nodes(&mut nodes, last_pos + 1, max_match_len);
3561                    }
3562                    let off_base = HcMatchGenerator::encode_offset_base_with_reps(
3563                        candidate.offset as u32,
3564                        initial_litlen,
3565                        initial_reps,
3566                    );
3567                    let off_price = profile
3568                        .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
3569                    debug_assert!(max_match_len < nodes.len());
3570                    let nodes0_price = nodes[0].price;
3571                    for match_len in (start_len..=max_match_len).rev() {
3572                        let ml_price = HcMatchGenerator::cached_match_length_price(
3573                            profile,
3574                            $stats,
3575                            match_len,
3576                            &mut ml_prices,
3577                            &mut ml_price_generations,
3578                            ml_price_stamp,
3579                        );
3580                        let seq_cost = HcMatchGenerator::add_prices(
3581                            ll0_price,
3582                            profile.match_price_from_parts(off_price, ml_price, $stats),
3583                        );
3584                        let next_cost = HcMatchGenerator::add_prices(nodes0_price, seq_cost);
3585                        let node_price = unsafe { nodes.get_unchecked(match_len).price };
3586                        if match_len > last_pos || next_cost < node_price {
3587                            let slot = unsafe { nodes.get_unchecked_mut(match_len) };
3588                            *slot = HcOptimalNode {
3589                                price: next_cost,
3590                                off: candidate.offset as u32,
3591                                mlen: match_len as u32,
3592                                litlen: 0,
3593                                reps: initial_reps,
3594                            };
3595                            if match_len > last_pos {
3596                                last_pos = match_len;
3597                            }
3598                        } else if abort_on_worse_match {
3599                            break;
3600                        }
3601                    }
3602                    prev_max_len = prev_max_len.max(max_match_len);
3603                }
3604                if last_pos + 1 < nodes.len() {
3605                    nodes[last_pos + 1].price = u32::MAX;
3606                }
3607            }
3608        }
3609        while !seed_forced_shortest_path && pos <= last_pos && pos <= frontier_limit {
3610            debug_assert!(pos + 1 < nodes.len());
3611            let prev_node = unsafe { *nodes.get_unchecked(pos - 1) };
3612            if prev_node.price != u32::MAX {
3613                let lit_len = prev_node.litlen as usize + 1;
3614                let lit_price = HcMatchGenerator::cached_literal_price(
3615                    profile,
3616                    $stats,
3617                    $current[pos - 1],
3618                    &mut $self.opt_lit_price_scratch,
3619                    &mut $self.opt_lit_price_generation,
3620                    lit_price_stamp,
3621                );
3622                let ll_delta = HcMatchGenerator::cached_lit_length_delta_price(
3623                    profile,
3624                    $stats,
3625                    lit_len,
3626                    &mut ll_prices,
3627                    &mut ll_price_generations,
3628                    ll_price_stamp,
3629                );
3630                let lit_cost =
3631                    HcMatchGenerator::add_price_delta(prev_node.price, lit_price, ll_delta);
3632                let node_pos_price = unsafe { nodes.get_unchecked(pos).price };
3633                if lit_cost <= node_pos_price {
3634                    let prev_match = unsafe { *nodes.get_unchecked(pos) };
3635                    let slot = unsafe { nodes.get_unchecked_mut(pos) };
3636                    *slot = prev_node;
3637                    slot.litlen = lit_len as u32;
3638                    slot.price = lit_cost;
3639                    #[allow(clippy::collapsible_if)]
3640                    if opt_level
3641                        && prev_match.mlen > 0
3642                        && prev_match.litlen == 0
3643                        && pos < $current_len
3644                    {
3645                        if ll1_price < ll0_price {
3646                            let next_lit_price = HcMatchGenerator::cached_literal_price(
3647                                profile,
3648                                $stats,
3649                                $current[pos],
3650                                &mut $self.opt_lit_price_scratch,
3651                                &mut $self.opt_lit_price_generation,
3652                                lit_price_stamp,
3653                            );
3654                            let with1literal = HcMatchGenerator::add_price_delta(
3655                                prev_match.price,
3656                                next_lit_price,
3657                                ll1_price as i32 - ll0_price as i32,
3658                            );
3659                            let ll_delta_next = HcMatchGenerator::cached_lit_length_delta_price(
3660                                profile,
3661                                $stats,
3662                                lit_len.saturating_add(1),
3663                                &mut ll_prices,
3664                                &mut ll_price_generations,
3665                                ll_price_stamp,
3666                            );
3667                            let with_more_literals = HcMatchGenerator::add_price_delta(
3668                                lit_cost,
3669                                next_lit_price,
3670                                ll_delta_next,
3671                            );
3672                            let next = pos + 1;
3673                            let next_price = unsafe { nodes.get_unchecked(next).price };
3674                            if with1literal < with_more_literals && with1literal < next_price {
3675                                // Donor parity (zstd_opt.c:1232): `cur >= prevMatch.mlen`.
3676                                debug_assert!(pos >= prev_match.mlen as usize);
3677                                let prev_pos = pos - prev_match.mlen as usize;
3678                                {
3679                                    let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
3680                                    let (_, reps_after_match) =
3681                                        HcMatchGenerator::encode_offset_with_reps(
3682                                            prev_match.off,
3683                                            prev_state.litlen as usize,
3684                                            prev_state.reps,
3685                                        );
3686                                    let slot = unsafe { nodes.get_unchecked_mut(next) };
3687                                    *slot = prev_match;
3688                                    slot.reps = reps_after_match;
3689                                    slot.litlen = 1;
3690                                    slot.price = with1literal;
3691                                    if next > last_pos {
3692                                        last_pos = next;
3693                                    }
3694                                }
3695                            }
3696                        }
3697                    }
3698                }
3699            }
3700
3701            let mut base_node = unsafe { *nodes.get_unchecked(pos) };
3702            if base_node.price == u32::MAX {
3703                pos += 1;
3704                continue;
3705            }
3706            if base_node.mlen > 0 && base_node.litlen == 0 {
3707                // Donor parity (zstd_opt.c:1255): `cur >= opt[cur].mlen`.
3708                debug_assert!(pos >= base_node.mlen as usize);
3709                let prev_pos = pos - base_node.mlen as usize;
3710                {
3711                    let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
3712                    let (_, reps_after_match) = HcMatchGenerator::encode_offset_with_reps(
3713                        base_node.off,
3714                        prev_state.litlen as usize,
3715                        prev_state.reps,
3716                    );
3717                    base_node.reps = reps_after_match;
3718                    unsafe { nodes.get_unchecked_mut(pos).reps = reps_after_match };
3719                }
3720            }
3721            let base_cost = base_node.price;
3722
3723            if pos + 8 > $current_len {
3724                pos += 1;
3725                continue;
3726            }
3727
3728            if pos == last_pos {
3729                break;
3730            }
3731
3732            let next_price = unsafe { nodes.get_unchecked(pos + 1).price };
3733            if abort_on_worse_match
3734                && next_price <= base_cost.saturating_add(HC_BITCOST_MULTIPLIER / 2)
3735            {
3736                pos += 1;
3737                continue;
3738            }
3739
3740            let abs_pos = $current_abs_start + pos;
3741            let ldm_candidate = if has_ldm {
3742                $self.ldm_process_match_candidate(
3743                    &mut opt_ldm,
3744                    pos,
3745                    $current_len - pos,
3746                    min_match_len,
3747                )
3748            } else {
3749                None
3750            };
3751            candidates.clear();
3752            // SAFETY: same umbrella as `$collect`.
3753            unsafe {
3754                $self.$collect::<true>(
3755                    abs_pos,
3756                    current_abs_end,
3757                    profile,
3758                    HcCandidateQuery {
3759                        reps: base_node.reps,
3760                        lit_len: base_node.litlen as usize,
3761                        ldm_candidate,
3762                    },
3763                    &mut candidates,
3764                )
3765            };
3766            if let Some(candidate) = candidates.last() {
3767                let longest_len = candidate.match_len.min($current_len - pos);
3768                if longest_len > sufficient_len
3769                    || pos + longest_len >= HC_OPT_NUM
3770                    || pos + longest_len >= $current_len
3771                {
3772                    let lit_len = base_node.litlen as usize;
3773                    let off_base = HcMatchGenerator::encode_offset_base_with_reps(
3774                        candidate.offset as u32,
3775                        lit_len,
3776                        base_node.reps,
3777                    );
3778                    let off_price = profile
3779                        .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
3780                    let ml_price = HcMatchGenerator::cached_match_length_price(
3781                        profile,
3782                        $stats,
3783                        longest_len,
3784                        &mut ml_prices,
3785                        &mut ml_price_generations,
3786                        ml_price_stamp,
3787                    );
3788                    let seq_cost = HcMatchGenerator::add_prices(
3789                        ll0_price,
3790                        profile.match_price_from_parts(off_price, ml_price, $stats),
3791                    );
3792                    let forced_price = HcMatchGenerator::add_prices(base_cost, seq_cost);
3793                    let end_pos = (pos + longest_len).min($current_len);
3794                    forced_end = Some(end_pos);
3795                    forced_end_state = Some(HcOptimalNode {
3796                        price: forced_price,
3797                        off: candidate.offset as u32,
3798                        mlen: longest_len as u32,
3799                        litlen: 0,
3800                        reps: base_node.reps,
3801                    });
3802                    break;
3803                }
3804            }
3805            let mut prev_max_len = min_match_len.saturating_sub(1);
3806            for candidate in candidates.iter() {
3807                let max_match_len = candidate
3808                    .match_len
3809                    .min($current_len - pos)
3810                    .min(frontier_limit.saturating_sub(pos));
3811                let min_len = min_match_len;
3812                if max_match_len < min_len {
3813                    continue;
3814                }
3815                let start_len = prev_max_len.saturating_add(1).max(min_len);
3816                if start_len > max_match_len {
3817                    prev_max_len = prev_max_len.max(max_match_len);
3818                    continue;
3819                }
3820                let max_next = pos + max_match_len;
3821                if max_next > last_pos {
3822                    HcMatchGenerator::reset_opt_nodes(&mut nodes, last_pos + 1, max_next);
3823                }
3824                let lit_len = base_node.litlen as usize;
3825                let off_base = HcMatchGenerator::encode_offset_base_with_reps(
3826                    candidate.offset as u32,
3827                    lit_len,
3828                    base_node.reps,
3829                );
3830                let off_price = profile
3831                    .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
3832                debug_assert!(pos + max_match_len < nodes.len());
3833                for match_len in (start_len..=max_match_len).rev() {
3834                    let next = pos + match_len;
3835                    let ml_price = HcMatchGenerator::cached_match_length_price(
3836                        profile,
3837                        $stats,
3838                        match_len,
3839                        &mut ml_prices,
3840                        &mut ml_price_generations,
3841                        ml_price_stamp,
3842                    );
3843                    let seq_cost = HcMatchGenerator::add_prices(
3844                        ll0_price,
3845                        profile.match_price_from_parts(off_price, ml_price, $stats),
3846                    );
3847                    let next_cost = HcMatchGenerator::add_prices(base_cost, seq_cost);
3848                    let node_next_price = unsafe { nodes.get_unchecked(next).price };
3849                    let improved = next > last_pos || next_cost < node_next_price;
3850                    if improved {
3851                        let slot = unsafe { nodes.get_unchecked_mut(next) };
3852                        *slot = HcOptimalNode {
3853                            price: next_cost,
3854                            off: candidate.offset as u32,
3855                            mlen: match_len as u32,
3856                            litlen: 0,
3857                            reps: base_node.reps,
3858                        };
3859                        if next > last_pos {
3860                            last_pos = next;
3861                        }
3862                    } else if abort_on_worse_match {
3863                        break;
3864                    }
3865                }
3866                prev_max_len = prev_max_len.max(max_match_len);
3867            }
3868
3869            if last_pos + 1 < nodes.len() {
3870                unsafe {
3871                    nodes.get_unchecked_mut(last_pos + 1).price = u32::MAX;
3872                }
3873            }
3874            pos += 1;
3875        }
3876
3877        if last_pos == 0 {
3878            if $current_len == 0 {
3879                let price = nodes[0].price;
3880                return $self.finish_optimal_plan(
3881                    HcOptimalPlanBuffers {
3882                        nodes,
3883                        candidates,
3884                        store,
3885                        ll_prices,
3886                        ll_price_generations,
3887                        ml_prices,
3888                        ml_price_generations,
3889                    },
3890                    (price, initial_reps, initial_litlen, 0),
3891                );
3892            }
3893            let lit_price = HcMatchGenerator::cached_literal_price(
3894                profile,
3895                $stats,
3896                $current[0],
3897                &mut $self.opt_lit_price_scratch,
3898                &mut $self.opt_lit_price_generation,
3899                lit_price_stamp,
3900            );
3901            let next_litlen = initial_litlen.saturating_add(1);
3902            let ll_delta = HcMatchGenerator::cached_lit_length_delta_price(
3903                profile,
3904                $stats,
3905                next_litlen,
3906                &mut ll_prices,
3907                &mut ll_price_generations,
3908                ll_price_stamp,
3909            );
3910            let price = HcMatchGenerator::add_price_delta(nodes[0].price, lit_price, ll_delta);
3911            return $self.finish_optimal_plan(
3912                HcOptimalPlanBuffers {
3913                    nodes,
3914                    candidates,
3915                    store,
3916                    ll_prices,
3917                    ll_price_generations,
3918                    ml_prices,
3919                    ml_price_generations,
3920                },
3921                (price, initial_reps, next_litlen, 1),
3922            );
3923        }
3924
3925        let target_pos = forced_end.unwrap_or(last_pos.min(frontier_limit));
3926        let last_stretch = if let Some(forced_state) = forced_end_state {
3927            forced_state
3928        } else {
3929            nodes[target_pos]
3930        };
3931        if last_stretch.price == u32::MAX {
3932            return $self.finish_optimal_plan(
3933                HcOptimalPlanBuffers {
3934                    nodes,
3935                    candidates,
3936                    store,
3937                    ll_prices,
3938                    ll_price_generations,
3939                    ml_prices,
3940                    ml_price_generations,
3941                },
3942                (u32::MAX, initial_reps, initial_litlen, $current_len),
3943            );
3944        }
3945
3946        if last_stretch.mlen == 0 {
3947            return $self.finish_optimal_plan(
3948                HcOptimalPlanBuffers {
3949                    nodes,
3950                    candidates,
3951                    store,
3952                    ll_prices,
3953                    ll_price_generations,
3954                    ml_prices,
3955                    ml_price_generations,
3956                },
3957                (
3958                    last_stretch.price,
3959                    last_stretch.reps,
3960                    last_stretch.litlen as usize,
3961                    target_pos.min($current_len),
3962                ),
3963            );
3964        }
3965
3966        let mut cur = target_pos.saturating_sub(last_stretch.mlen as usize);
3967        let end_reps = if last_stretch.litlen == 0 {
3968            let prev_state = nodes[cur];
3969            let (_, reps_after_match) = HcMatchGenerator::encode_offset_with_reps(
3970                last_stretch.off,
3971                prev_state.litlen as usize,
3972                prev_state.reps,
3973            );
3974            reps_after_match
3975        } else {
3976            let tail_literals = last_stretch.litlen as usize;
3977            if cur < tail_literals {
3978                return $self.finish_optimal_plan(
3979                    HcOptimalPlanBuffers {
3980                        nodes,
3981                        candidates,
3982                        store,
3983                        ll_prices,
3984                        ll_price_generations,
3985                        ml_prices,
3986                        ml_price_generations,
3987                    },
3988                    (
3989                        last_stretch.price,
3990                        last_stretch.reps,
3991                        tail_literals,
3992                        target_pos.min($current_len),
3993                    ),
3994                );
3995            }
3996            cur -= tail_literals;
3997            last_stretch.reps
3998        };
3999        let store_end = cur + 2;
4000        if store.len() <= store_end {
4001            store.resize(store_end + 1, HcOptimalNode::default());
4002        }
4003        let mut store_start;
4004        let mut stretch_pos = cur;
4005
4006        if last_stretch.litlen > 0 {
4007            store[store_end] = HcOptimalNode {
4008                litlen: last_stretch.litlen,
4009                mlen: 0,
4010                ..HcOptimalNode::default()
4011            };
4012            store_start = store_end.saturating_sub(1);
4013            store[store_start] = last_stretch;
4014        }
4015        store[store_end] = last_stretch;
4016        store_start = store_end;
4017
4018        loop {
4019            let next_stretch = nodes[stretch_pos];
4020            store[store_start].litlen = next_stretch.litlen;
4021            if next_stretch.mlen == 0 {
4022                break;
4023            }
4024            if store_start == 0 {
4025                break;
4026            }
4027            store_start -= 1;
4028            store[store_start] = next_stretch;
4029            let step = (next_stretch.litlen as usize).saturating_add(next_stretch.mlen as usize);
4030            if step == 0 || stretch_pos < step {
4031                break;
4032            }
4033            stretch_pos -= step;
4034        }
4035
4036        let mut tail_literals = initial_litlen;
4037        let mut store_pos = store_start;
4038        while store_pos <= store_end {
4039            let stretch = store[store_pos];
4040            let llen = stretch.litlen as usize;
4041            let mlen = stretch.mlen as usize;
4042            if mlen == 0 {
4043                tail_literals = llen;
4044                store_pos += 1;
4045                continue;
4046            }
4047            $out.push(HcOptimalSequence {
4048                offset: stretch.off,
4049                match_len: mlen as u32,
4050                lit_len: llen as u32,
4051            });
4052            tail_literals = 0;
4053            store_pos += 1;
4054        }
4055        let result = (
4056            last_stretch.price,
4057            end_reps,
4058            if last_stretch.litlen > 0 {
4059                last_stretch.litlen as usize
4060            } else {
4061                tail_literals
4062            },
4063            target_pos.min($current_len),
4064        );
4065        $self.finish_optimal_plan(
4066            HcOptimalPlanBuffers {
4067                nodes,
4068                candidates,
4069                store,
4070                ll_prices,
4071                ll_price_generations,
4072                ml_prices,
4073                ml_price_generations,
4074            },
4075            result,
4076        )
4077    }};
4078}
4079
4080/// `collect_optimal_candidates_initialized` body parameterized over the per-CPU
4081/// kernel: the `$cpl` path is the kernel's `common_prefix_len_ptr` (used in
4082/// the HC chain walk fallback), and the four method-name substitutions
4083/// (`$bt_update`, `$bt_insert`, `$for_each_rep`, `$hash3`) route to the
4084/// kernel-specific wrappers of the inner helpers. With every helper under
4085/// the same `target_feature` umbrella, the entire per-position pipeline
4086/// (BT-tree fill + rep probing + hash3 probing + BT match collection /
4087/// HC chain walk) inlines without ABI barriers on the level22 hot path.
4088macro_rules! collect_optimal_candidates_initialized_body {
4089    (
4090        $self:expr,
4091        $abs_pos:ident,
4092        $current_abs_end:ident,
4093        $profile:ident,
4094        $query:ident,
4095        $out:ident,
4096        $bt_matchfinder:ident,
4097        $bt_update:ident,
4098        $bt_insert:ident,
4099        $for_each_rep:ident,
4100        $hash3:ident,
4101        $cpl:path $(,)?
4102    ) => {{
4103        debug_assert!(!$self.hash_table.is_empty());
4104        debug_assert!($self.hash3_log == 0 || !$self.hash3_table.is_empty());
4105        debug_assert!(!$self.chain_table.is_empty());
4106        let min_match_len = HC_OPT_MIN_MATCH_LEN;
4107        let reps = $query.reps;
4108        let lit_len = $query.lit_len;
4109        let ldm_candidate = $query.ldm_candidate;
4110        $out.clear();
4111        if $abs_pos < $self.skip_insert_until_abs {
4112            if let Some(ldm) = ldm_candidate {
4113                let mut best_len_for_skip = 0usize;
4114                let _ = HcMatchGenerator::push_candidate_ladder(
4115                    $out,
4116                    &mut best_len_for_skip,
4117                    ldm,
4118                    min_match_len,
4119                );
4120            }
4121            return;
4122        }
4123        if $bt_matchfinder {
4124            // SAFETY: caller is in the same target_feature umbrella as
4125            // `$bt_update`; the runtime kernel detector already gated entry.
4126            unsafe { $self.$bt_update($abs_pos, $current_abs_end) };
4127        }
4128        let current_idx = $abs_pos - $self.history_abs_start;
4129        if current_idx + 4 > $self.live_history().len() {
4130            if let Some(ldm) = ldm_candidate {
4131                let mut best_len_for_skip = 0usize;
4132                let _ = HcMatchGenerator::push_candidate_ladder(
4133                    $out,
4134                    &mut best_len_for_skip,
4135                    ldm,
4136                    min_match_len,
4137                );
4138            }
4139            return;
4140        }
4141        let mut best_len_for_skip = 0usize;
4142        let mut skip_further_match_search = false;
4143        let mut rep_len_candidate_found = false;
4144        // SAFETY: same umbrella; closure capture is monomorphized per call.
4145        unsafe {
4146            $self.$for_each_rep(
4147                $abs_pos,
4148                lit_len,
4149                reps,
4150                $current_abs_end,
4151                min_match_len,
4152                |rep| {
4153                    if rep.match_len >= min_match_len {
4154                        rep_len_candidate_found = true;
4155                    }
4156                    let _ = HcMatchGenerator::push_candidate_ladder(
4157                        $out,
4158                        &mut best_len_for_skip,
4159                        rep,
4160                        min_match_len,
4161                    );
4162                    if rep.match_len > $profile.sufficient_match_len {
4163                        skip_further_match_search = true;
4164                    }
4165                    if $abs_pos.saturating_add(rep.match_len) >= $current_abs_end {
4166                        skip_further_match_search = true;
4167                    }
4168                },
4169            )
4170        };
4171        if !skip_further_match_search && best_len_for_skip < min_match_len {
4172            $self.update_hash3_until($abs_pos);
4173            // SAFETY: same umbrella for hash3_candidate.
4174            if let Some(h3) = unsafe { $self.$hash3($abs_pos, $current_abs_end, min_match_len) } {
4175                let _ = HcMatchGenerator::push_candidate_ladder(
4176                    $out,
4177                    &mut best_len_for_skip,
4178                    h3,
4179                    min_match_len,
4180                );
4181                if !rep_len_candidate_found
4182                    && (h3.match_len > $profile.sufficient_match_len
4183                        || $abs_pos.saturating_add(h3.match_len) >= $current_abs_end)
4184                {
4185                    $self.skip_insert_until_abs = $abs_pos.saturating_add(1);
4186                    skip_further_match_search = true;
4187                }
4188            }
4189        }
4190        if !skip_further_match_search && $bt_matchfinder {
4191            // SAFETY: same umbrella for bt_insert_and_collect_matches.
4192            unsafe {
4193                $self.$bt_insert(
4194                    $abs_pos,
4195                    $current_abs_end,
4196                    $profile,
4197                    min_match_len,
4198                    &mut best_len_for_skip,
4199                    $out,
4200                )
4201            };
4202        } else if !skip_further_match_search {
4203            $self.insert_position($abs_pos);
4204            let max_chain_depth = $profile.max_chain_depth.min($self.search_depth);
4205            let concat = &$self.history[$self.history_start..];
4206            let mut match_end_abs = $abs_pos.saturating_add(9);
4207            if max_chain_depth > 0 {
4208                for (visited, candidate_abs) in
4209                    $self.chain_candidates($abs_pos).into_iter().enumerate()
4210                {
4211                    if visited >= max_chain_depth {
4212                        break;
4213                    }
4214                    if candidate_abs == usize::MAX {
4215                        break;
4216                    }
4217                    if candidate_abs < $self.history_abs_start || candidate_abs >= $abs_pos {
4218                        continue;
4219                    }
4220                    let candidate_idx = candidate_abs - $self.history_abs_start;
4221                    let tail_limit = $current_abs_end.saturating_sub($abs_pos);
4222                    let base = concat.as_ptr();
4223                    // SAFETY: history-relative indices; `tail_limit` bounds
4224                    // the scan within `concat`. `$cpl` is the kernel-specific
4225                    // common_prefix_len_ptr — call inlines because the
4226                    // surrounding wrapper carries the same target_feature.
4227                    let match_len =
4228                        unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), tail_limit) };
4229                    if match_len < min_match_len {
4230                        continue;
4231                    }
4232                    let offset = $abs_pos - candidate_abs;
4233                    if HcMatchGenerator::push_candidate_ladder(
4234                        $out,
4235                        &mut best_len_for_skip,
4236                        MatchCandidate {
4237                            start: $abs_pos,
4238                            offset,
4239                            match_len,
4240                        },
4241                        min_match_len,
4242                    ) {
4243                        let candidate_end = candidate_abs.saturating_add(match_len);
4244                        if candidate_end > match_end_abs {
4245                            match_end_abs = candidate_end;
4246                        }
4247                    }
4248                    if match_len > HC_OPT_NUM
4249                        || $abs_pos.saturating_add(match_len) >= $current_abs_end
4250                    {
4251                        break;
4252                    }
4253                }
4254            }
4255            $self.skip_insert_until_abs = $self
4256                .skip_insert_until_abs
4257                .max(match_end_abs.saturating_sub(8));
4258        }
4259        if let Some(ldm) = ldm_candidate {
4260            let _ = HcMatchGenerator::push_candidate_ladder(
4261                $out,
4262                &mut best_len_for_skip,
4263                ldm,
4264                min_match_len,
4265            );
4266        }
4267    }};
4268}
4269
4270/// `hash3_candidate` body parameterized over the per-CPU
4271/// `common_prefix_len_ptr` symbol. The hash3 probe checks one candidate per
4272/// position when invoked, so the per-call ABI savings compound across the
4273/// segment.
4274macro_rules! hash3_candidate_body {
4275    (
4276        $self:expr,
4277        $abs_pos:ident,
4278        $current_abs_end:ident,
4279        $min_match_len:ident,
4280        $cpl:path $(,)?
4281    ) => {{
4282        if $self.hash3_log == 0 {
4283            return None;
4284        }
4285        let idx = $abs_pos.checked_sub($self.history_abs_start)?;
4286        let concat = $self.live_history();
4287        if idx + 4 > concat.len() {
4288            return None;
4289        }
4290        let hash3 = HcMatchGenerator::hash_position_at(concat, idx, $self.hash3_log, 3);
4291        let entry = $self.hash3_table.get(hash3).copied().unwrap_or(HC_EMPTY);
4292        let candidate_abs = HcMatchGenerator::stored_abs_position_fast(
4293            entry,
4294            $self.position_base,
4295            $self.index_shift,
4296        )?;
4297        if candidate_abs < $self.history_abs_start || candidate_abs >= $abs_pos {
4298            return None;
4299        }
4300        let offset = $abs_pos - candidate_abs;
4301        if offset >= HC3_MAX_OFFSET {
4302            return None;
4303        }
4304        let candidate_idx = candidate_abs - $self.history_abs_start;
4305        let tail_limit = $current_abs_end.saturating_sub($abs_pos);
4306        let base = concat.as_ptr();
4307        // SAFETY: candidate/idx are within history range; tail_limit bounds
4308        // the scan within `concat`.
4309        let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(idx), tail_limit) };
4310        (match_len >= $min_match_len).then_some(MatchCandidate {
4311            start: $abs_pos,
4312            offset,
4313            match_len,
4314        })
4315    }};
4316}
4317
4318/// `for_each_repcode_candidate_with_reps` body parameterized over the per-CPU
4319/// `common_prefix_len_ptr` symbol so the per-rep prefix probe inlines under
4320/// the wrapper's `target_feature` umbrella instead of crossing the ABI
4321/// boundary through the dispatcher. Three rep probes per encoded position →
4322/// thousands per segment, so the per-call barrier was non-trivial.
4323///
4324/// The callback `f` runs in the wrapper's umbrella context too, so closures
4325/// that capture mutable state still work (FnMut).
4326macro_rules! for_each_repcode_candidate_body {
4327    (
4328        $self:expr,
4329        $abs_pos:ident,
4330        $lit_len:ident,
4331        $reps:ident,
4332        $current_abs_end:ident,
4333        $min_match_len:ident,
4334        $f:ident,
4335        $cpl:path $(,)?
4336    ) => {{
4337        let rep_offsets: [Option<usize>; 3] = if $lit_len == 0 {
4338            [
4339                Some($reps[1] as usize),
4340                Some($reps[2] as usize),
4341                ($reps[0] > 1).then_some(($reps[0] - 1) as usize),
4342            ]
4343        } else {
4344            [
4345                Some($reps[0] as usize),
4346                Some($reps[1] as usize),
4347                Some($reps[2] as usize),
4348            ]
4349        };
4350        let concat = $self.live_history();
4351        let current_idx = $abs_pos - $self.history_abs_start;
4352        if current_idx + 4 > concat.len() {
4353            return;
4354        }
4355        let tail_limit = $current_abs_end.saturating_sub($abs_pos);
4356        let base = concat.as_ptr();
4357        let concat_len = concat.len();
4358        for rep in rep_offsets.into_iter().flatten() {
4359            if rep == 0 || rep > $abs_pos {
4360                continue;
4361            }
4362            let candidate_pos = $abs_pos - rep;
4363            if candidate_pos < $self.history_abs_start {
4364                continue;
4365            }
4366            let candidate_idx = candidate_pos - $self.history_abs_start;
4367            // SAFETY: `candidate_idx ≤ current_idx < concat_len` (since
4368            // candidate_pos ≤ abs_pos and we early-returned on
4369            // `current_idx + 4 > concat_len`). `max` clamps to the shorter
4370            // remaining run so neither pointer overruns `concat`.
4371            let max = (concat_len - candidate_idx)
4372                .min(concat_len - current_idx)
4373                .min(tail_limit);
4374            let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), max) };
4375            if match_len < $min_match_len {
4376                continue;
4377            }
4378            $f(MatchCandidate {
4379                start: $abs_pos,
4380                offset: rep,
4381                match_len,
4382            });
4383        }
4384    }};
4385}
4386
4387/// `bt_insert_and_collect_matches` body parameterized over the per-CPU
4388/// `count_match_from_indices` symbol. Same shape as
4389/// [`bt_insert_step_no_rebase_body`] — picks up the matching kernel through
4390/// `$cmf` so the per-iteration vector probe inlines under the wrapper's
4391/// `target_feature` umbrella. Returns nothing (matches the original method).
4392macro_rules! bt_insert_and_collect_matches_body {
4393    (
4394        $self:expr,
4395        $abs_pos:ident,
4396        $current_abs_end:ident,
4397        $profile:ident,
4398        $min_match_len:ident,
4399        $best_len_for_skip:ident,
4400        $out:ident,
4401        $cmf:path $(,)?
4402    ) => {{
4403        let idx = $abs_pos - $self.history_abs_start;
4404        let concat = &$self.history[$self.history_start..];
4405        if idx + 8 > concat.len() {
4406            return;
4407        }
4408        let tail_limit = $current_abs_end.saturating_sub($abs_pos);
4409        let hash =
4410            HcMatchGenerator::hash_position_at(concat, idx, $self.hash_log, $self.bt_hash_mls());
4411        let Some(relative_pos) = $self.relative_position($abs_pos) else {
4412            return;
4413        };
4414        let stored = relative_pos + 1;
4415        let bt_mask = $self.bt_mask();
4416        let bt_low = $abs_pos.saturating_sub(bt_mask);
4417        let window_low = $self.window_low_abs_for_target($abs_pos);
4418        let mut match_end_abs = $abs_pos.saturating_add(9);
4419        let mut compares_left = $profile.max_chain_depth.min($self.search_depth);
4420        let mut common_length_smaller = 0usize;
4421        let mut common_length_larger = 0usize;
4422        let pair_idx = $self.bt_pair_index_for_abs($abs_pos);
4423        let mut smaller_slot = pair_idx;
4424        let mut larger_slot = pair_idx + 1;
4425        let mut match_stored = $self.hash_table[hash];
4426        $self.hash_table[hash] = stored;
4427        // Donor semantics: `bestLength` starts at `lengthToBeat - 1`; rep/hash3
4428        // probing may raise it; BT then only reports strictly longer matches.
4429        let mut best_len = (*$best_len_for_skip).max($min_match_len.saturating_sub(1));
4430
4431        while compares_left > 0 {
4432            let Some(candidate_abs) = HcMatchGenerator::stored_abs_position_fast(
4433                match_stored,
4434                $self.position_base,
4435                $self.index_shift,
4436            ) else {
4437                break;
4438            };
4439            if candidate_abs < window_low || candidate_abs >= $abs_pos {
4440                break;
4441            }
4442            compares_left -= 1;
4443
4444            let next_pair_idx = $self.bt_pair_index_for_abs(candidate_abs);
4445            let next_smaller = $self.chain_table[next_pair_idx];
4446            let next_larger = $self.chain_table[next_pair_idx + 1];
4447            let seed_len = common_length_smaller.min(common_length_larger);
4448            let candidate_idx = candidate_abs - $self.history_abs_start;
4449            // SAFETY: BT walk invariant — `candidate_idx + tail_limit ≤
4450            // concat.len()`.
4451            let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
4452
4453            if match_len > best_len {
4454                let offset = $abs_pos - candidate_abs;
4455                let accepted = HcMatchGenerator::push_candidate_ladder(
4456                    $out,
4457                    $best_len_for_skip,
4458                    MatchCandidate {
4459                        start: $abs_pos,
4460                        offset,
4461                        match_len,
4462                    },
4463                    $min_match_len,
4464                );
4465                if accepted {
4466                    best_len = match_len;
4467                    let candidate_end = candidate_abs.saturating_add(match_len);
4468                    if candidate_end > match_end_abs {
4469                        match_end_abs = candidate_end;
4470                    }
4471                    if match_len >= tail_limit || match_len > HC_OPT_NUM {
4472                        break;
4473                    }
4474                }
4475            }
4476
4477            if match_len >= tail_limit {
4478                break;
4479            }
4480
4481            let candidate_next = candidate_idx + match_len;
4482            let current_next = idx + match_len;
4483            if concat[candidate_next] < concat[current_next] {
4484                $self.chain_table[smaller_slot] = match_stored;
4485                common_length_smaller = match_len;
4486                if candidate_abs <= bt_low {
4487                    smaller_slot = usize::MAX;
4488                    break;
4489                }
4490                smaller_slot = next_pair_idx + 1;
4491                match_stored = next_larger;
4492            } else {
4493                $self.chain_table[larger_slot] = match_stored;
4494                common_length_larger = match_len;
4495                if candidate_abs <= bt_low {
4496                    larger_slot = usize::MAX;
4497                    break;
4498                }
4499                larger_slot = next_pair_idx;
4500                match_stored = next_smaller;
4501            }
4502        }
4503
4504        if smaller_slot != usize::MAX {
4505            $self.chain_table[smaller_slot] = HC_EMPTY;
4506        }
4507        if larger_slot != usize::MAX {
4508            $self.chain_table[larger_slot] = HC_EMPTY;
4509        }
4510        $self.skip_insert_until_abs = match_end_abs.saturating_sub(8);
4511    }};
4512}
4513
4514impl HcMatchGenerator {
4515    fn donor_opt_start_cursor_and_litlen(&self, current_abs_start: usize) -> (usize, usize) {
4516        let start_cursor = usize::from(current_abs_start == self.history_abs_start);
4517        (start_cursor, start_cursor)
4518    }
4519
4520    fn sufficient_match_len_for_pass(&self, profile: HcOptimalCostProfile) -> usize {
4521        profile
4522            .sufficient_match_len
4523            .min(self.target_len)
4524            .clamp(HC_OPT_MIN_MATCH_LEN, HC_OPT_NUM - 1)
4525    }
4526
4527    fn should_run_btultra2_seed_pass(&self, current_len: usize) -> bool {
4528        self.parse_mode == HcParseMode::BtUltra2
4529            && self.opt_state.lit_length_sum == 0
4530            && self.opt_state.dictionary_seed.is_none()
4531            && !self.dictionary_primed_for_frame
4532            && self.ldm_sequences.is_empty()
4533            && self.window_size == current_len
4534            && self.history_abs_start == 0
4535            && self.window.len() == 1
4536            && current_len > HC_PREDEF_THRESHOLD
4537    }
4538
4539    fn mark_dictionary_primed(&mut self) {
4540        self.dictionary_primed_for_frame = true;
4541    }
4542
4543    fn set_dictionary_limit_from_primed_bytes(&mut self, primed_len: usize) {
4544        self.dictionary_limit_abs = if primed_len == 0 {
4545            None
4546        } else {
4547            Some(self.history_abs_start.saturating_add(primed_len))
4548        };
4549    }
4550
4551    fn encode_offset_with_reps(
4552        actual_offset: u32,
4553        lit_len: usize,
4554        reps: [u32; 3],
4555    ) -> (u32, [u32; 3]) {
4556        let mut next_reps = reps;
4557        let encoded = if lit_len > 0 {
4558            if actual_offset == reps[0] {
4559                1
4560            } else if actual_offset == reps[1] {
4561                2
4562            } else if actual_offset == reps[2] {
4563                3
4564            } else {
4565                actual_offset.saturating_add(3)
4566            }
4567        } else if actual_offset == reps[1] {
4568            1
4569        } else if actual_offset == reps[2] {
4570            2
4571        } else if reps[0] > 1 && actual_offset == reps[0] - 1 {
4572            3
4573        } else {
4574            actual_offset.saturating_add(3)
4575        };
4576
4577        if lit_len > 0 {
4578            match encoded {
4579                1 => {}
4580                2 => {
4581                    next_reps[1] = next_reps[0];
4582                    next_reps[0] = actual_offset;
4583                }
4584                _ => {
4585                    next_reps[2] = next_reps[1];
4586                    next_reps[1] = next_reps[0];
4587                    next_reps[0] = actual_offset;
4588                }
4589            }
4590        } else {
4591            match encoded {
4592                1 => {
4593                    next_reps[1] = next_reps[0];
4594                    next_reps[0] = actual_offset;
4595                }
4596                _ => {
4597                    next_reps[2] = next_reps[1];
4598                    next_reps[1] = next_reps[0];
4599                    next_reps[0] = actual_offset;
4600                }
4601            }
4602        }
4603
4604        (encoded, next_reps)
4605    }
4606
4607    #[inline(always)]
4608    fn encode_offset_base_with_reps(actual_offset: u32, lit_len: usize, reps: [u32; 3]) -> u32 {
4609        if lit_len > 0 {
4610            if actual_offset == reps[0] {
4611                1
4612            } else if actual_offset == reps[1] {
4613                2
4614            } else if actual_offset == reps[2] {
4615                3
4616            } else {
4617                actual_offset.saturating_add(3)
4618            }
4619        } else if actual_offset == reps[1] {
4620            1
4621        } else if actual_offset == reps[2] {
4622            2
4623        } else if reps[0] > 1 && actual_offset == reps[0] - 1 {
4624            3
4625        } else {
4626            actual_offset.saturating_add(3)
4627        }
4628    }
4629
4630    fn new(max_window_size: usize) -> Self {
4631        Self {
4632            max_window_size,
4633            window: VecDeque::new(),
4634            window_size: 0,
4635            history: Vec::new(),
4636            history_start: 0,
4637            history_abs_start: 0,
4638            position_base: 0,
4639            index_shift: 0,
4640            offset_hist: [1, 4, 8],
4641            hash_table: Vec::new(),
4642            hash3_table: Vec::new(),
4643            chain_table: Vec::new(),
4644            lazy_depth: 2,
4645            hash_log: HC_HASH_LOG,
4646            chain_log: HC_CHAIN_LOG,
4647            search_depth: HC_SEARCH_DEPTH,
4648            target_len: HC_TARGET_LEN,
4649            parse_mode: HcParseMode::Lazy2,
4650            ldm_sequences: Vec::new(),
4651            next_to_update3: 0,
4652            hash3_log: HC3_HASH_LOG,
4653            skip_insert_until_abs: 0,
4654            dictionary_limit_abs: None,
4655            dictionary_primed_for_frame: false,
4656            allow_zero_relative_position: false,
4657            opt_state: HcOptState::new(),
4658            opt_nodes_scratch: Vec::new(),
4659            opt_candidates_scratch: Vec::new(),
4660            opt_store_scratch: Vec::new(),
4661            opt_segment_plan_scratch: Vec::new(),
4662            opt_seed_plan_scratch: Vec::new(),
4663            opt_ll_price_scratch: Vec::new(),
4664            opt_ll_price_generation: Vec::new(),
4665            opt_ll_price_stamp: 0,
4666            opt_lit_price_scratch: [0; HC_MAX_LIT + 1],
4667            opt_lit_price_generation: [0; HC_MAX_LIT + 1],
4668            opt_lit_price_stamp: 0,
4669            opt_ml_price_scratch: Vec::new(),
4670            opt_ml_price_generation: Vec::new(),
4671            opt_ml_price_stamp: 0,
4672        }
4673    }
4674
4675    fn configure(&mut self, config: HcConfig, window_log: u8) {
4676        let next_hash3_log = if config.parse_mode == HcParseMode::BtUltra2 {
4677            HC3_HASH_LOG.min(window_log as usize)
4678        } else {
4679            0
4680        };
4681        let resize = self.hash_log != config.hash_log
4682            || self.chain_log != config.chain_log
4683            || self.hash3_log != next_hash3_log;
4684        self.hash_log = config.hash_log;
4685        self.chain_log = config.chain_log;
4686        self.hash3_log = next_hash3_log;
4687        self.search_depth = if matches!(
4688            config.parse_mode,
4689            HcParseMode::BtOpt | HcParseMode::BtUltra | HcParseMode::BtUltra2
4690        ) {
4691            config.search_depth
4692        } else {
4693            config.search_depth.min(MAX_HC_SEARCH_DEPTH)
4694        };
4695        self.target_len = config.target_len;
4696        self.parse_mode = config.parse_mode;
4697        if resize && !self.hash_table.is_empty() {
4698            // Force reallocation on next ensure_tables() call.
4699            self.hash_table.clear();
4700            self.hash3_table.clear();
4701            self.chain_table.clear();
4702        }
4703    }
4704
4705    fn seed_dictionary_entropy(
4706        &mut self,
4707        huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
4708        ll: Option<&crate::fse::fse_encoder::FSETable>,
4709        ml: Option<&crate::fse::fse_encoder::FSETable>,
4710        of: Option<&crate::fse::fse_encoder::FSETable>,
4711    ) {
4712        self.opt_state.seed_dictionary_entropy(huff, ll, ml, of);
4713    }
4714
4715    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
4716        self.window_size = 0;
4717        self.history.clear();
4718        self.history_start = 0;
4719        self.history_abs_start = 0;
4720        self.position_base = 0;
4721        self.index_shift = 0;
4722        self.offset_hist = [1, 4, 8];
4723        self.ldm_sequences.clear();
4724        self.next_to_update3 = 0;
4725        self.skip_insert_until_abs = 0;
4726        self.dictionary_limit_abs = None;
4727        self.dictionary_primed_for_frame = false;
4728        self.allow_zero_relative_position = false;
4729        self.opt_state.reset();
4730        self.opt_nodes_scratch.clear();
4731        self.opt_candidates_scratch.clear();
4732        self.opt_store_scratch.clear();
4733        self.opt_segment_plan_scratch.clear();
4734        self.opt_seed_plan_scratch.clear();
4735        self.opt_ll_price_scratch.clear();
4736        self.opt_ll_price_generation.clear();
4737        self.opt_ll_price_stamp = 0;
4738        self.opt_lit_price_scratch = [0; HC_MAX_LIT + 1];
4739        self.opt_lit_price_generation = [0; HC_MAX_LIT + 1];
4740        self.opt_lit_price_stamp = 0;
4741        self.opt_ml_price_scratch.clear();
4742        self.opt_ml_price_generation.clear();
4743        self.opt_ml_price_stamp = 0;
4744        if !self.hash_table.is_empty() {
4745            self.hash_table.fill(HC_EMPTY);
4746            self.hash3_table.fill(HC_EMPTY);
4747            self.chain_table.fill(HC_EMPTY);
4748        }
4749        for mut data in self.window.drain(..) {
4750            data.resize(data.capacity(), 0);
4751            reuse_space(data);
4752        }
4753    }
4754
4755    fn get_last_space(&self) -> &[u8] {
4756        self.window.back().unwrap().as_slice()
4757    }
4758
4759    // History duplicates window data for O(1) contiguous access during match
4760    // finding (common_prefix_len, extend_backwards). Same pattern as
4761    // DfastMatchGenerator. Peak: ~2x window size for data buffers + 6 MB tables.
4762    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
4763        assert!(data.len() <= self.max_window_size);
4764        while self.window_size + data.len() > self.max_window_size {
4765            let removed = self.window.pop_front().unwrap();
4766            self.window_size -= removed.len();
4767            self.history_start += removed.len();
4768            self.history_abs_start += removed.len();
4769            reuse_space(removed);
4770        }
4771        self.compact_history();
4772        self.history.extend_from_slice(&data);
4773        self.next_to_update3 = self.next_to_update3.max(self.history_abs_start);
4774        self.window_size += data.len();
4775        self.window.push_back(data);
4776    }
4777
4778    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
4779        while self.window_size > self.max_window_size {
4780            let removed = self.window.pop_front().unwrap();
4781            self.window_size -= removed.len();
4782            self.history_start += removed.len();
4783            self.history_abs_start += removed.len();
4784            reuse_space(removed);
4785        }
4786    }
4787
4788    /// Backfill positions from the tail of the previous slice that couldn't be
4789    /// hashed at the time (insert_position needs 4 bytes of lookahead).
4790    fn backfill_boundary_positions(&mut self, current_abs_start: usize, current_abs_end: usize) {
4791        let backfill_start = current_abs_start
4792            .saturating_sub(3)
4793            .max(self.history_abs_start);
4794        if backfill_start < current_abs_start {
4795            if self.uses_bt_matchfinder() {
4796                self.bt_update_tree_until(current_abs_start, current_abs_end);
4797            } else {
4798                self.insert_positions(backfill_start, current_abs_start);
4799            }
4800        }
4801    }
4802
4803    fn apply_limited_update_after_long_match(&mut self, current_abs_start: usize) {
4804        if !self.uses_bt_matchfinder() {
4805            return;
4806        }
4807        let gap = current_abs_start.saturating_sub(self.skip_insert_until_abs);
4808        if gap > 384 {
4809            self.skip_insert_until_abs = current_abs_start - (gap - 384).min(192);
4810        }
4811    }
4812
4813    fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
4814        self.ensure_tables();
4815        let current_len = self.window.back().unwrap().len();
4816        let current_abs_start = self.history_abs_start + self.window_size - current_len;
4817        let current_abs_end = current_abs_start + current_len;
4818        self.backfill_boundary_positions(current_abs_start, current_abs_end);
4819        if self.uses_bt_matchfinder() {
4820            if incompressible_hint == Some(true) {
4821                self.bt_insert_sparse_incompressible_block(current_abs_start, current_abs_end);
4822                return;
4823            }
4824            self.bt_update_tree_until(current_abs_end, current_abs_end);
4825            return;
4826        }
4827        if incompressible_hint == Some(true) {
4828            self.insert_positions_with_step(
4829                current_abs_start,
4830                current_abs_end,
4831                INCOMPRESSIBLE_SKIP_STEP,
4832            );
4833            let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
4834            let tail_start = current_abs_end
4835                .saturating_sub(dense_tail)
4836                .max(self.history_abs_start);
4837            let tail_start = tail_start.max(current_abs_start);
4838            for pos in tail_start..current_abs_end {
4839                if !(pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
4840                    self.insert_position(pos);
4841                }
4842            }
4843        } else {
4844            self.insert_positions(current_abs_start, current_abs_end);
4845        }
4846    }
4847
4848    fn bt_insert_sparse_incompressible_block(
4849        &mut self,
4850        current_abs_start: usize,
4851        current_abs_end: usize,
4852    ) {
4853        let mut pos = current_abs_start;
4854        while pos < current_abs_end {
4855            self.maybe_rebase_positions(pos);
4856            let _ = self.bt_insert_step_no_rebase(pos, current_abs_end, current_abs_end);
4857            self.insert_hash3_only_no_rebase(pos);
4858            let next = pos.saturating_add(INCOMPRESSIBLE_SKIP_STEP);
4859            if next <= pos {
4860                break;
4861            }
4862            pos = next;
4863        }
4864
4865        let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
4866        let tail_start = current_abs_end
4867            .saturating_sub(dense_tail)
4868            .max(self.history_abs_start)
4869            .max(current_abs_start);
4870        for pos in tail_start..current_abs_end {
4871            if (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
4872                continue;
4873            }
4874            self.maybe_rebase_positions(pos);
4875            let _ = self.bt_insert_step_no_rebase(pos, current_abs_end, current_abs_end);
4876            self.insert_hash3_only_no_rebase(pos);
4877        }
4878
4879        self.skip_insert_until_abs = self.skip_insert_until_abs.max(current_abs_end);
4880        self.next_to_update3 = self.next_to_update3.max(current_abs_end);
4881    }
4882
4883    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
4884        match self.parse_mode {
4885            HcParseMode::Lazy2 => self.start_matching_lazy(&mut handle_sequence),
4886            HcParseMode::BtOpt | HcParseMode::BtUltra | HcParseMode::BtUltra2 => {
4887                self.start_matching_optimal(&mut handle_sequence)
4888            }
4889        }
4890    }
4891
4892    fn start_matching_lazy(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
4893        self.ensure_tables();
4894
4895        let current_len = self.window.back().unwrap().len();
4896        if current_len == 0 {
4897            return;
4898        }
4899
4900        let current_abs_start = self.history_abs_start + self.window_size - current_len;
4901        let current_abs_end = current_abs_start + current_len;
4902        self.backfill_boundary_positions(current_abs_start, current_abs_end);
4903
4904        let mut pos = 0usize;
4905        let mut literals_start = 0usize;
4906        while pos + HC_MIN_MATCH_LEN <= current_len {
4907            let abs_pos = current_abs_start + pos;
4908            let lit_len = pos - literals_start;
4909
4910            let best = self.find_best_match(abs_pos, lit_len);
4911            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
4912                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
4913                let current = self.window.back().unwrap().as_slice();
4914                let start = candidate.start - current_abs_start;
4915                let literals = &current[literals_start..start];
4916                handle_sequence(Sequence::Triple {
4917                    literals,
4918                    offset: candidate.offset,
4919                    match_len: candidate.match_len,
4920                });
4921                let _ = encode_offset_with_history(
4922                    candidate.offset as u32,
4923                    literals.len() as u32,
4924                    &mut self.offset_hist,
4925                );
4926                pos = start + candidate.match_len;
4927                literals_start = pos;
4928            } else {
4929                self.insert_position(abs_pos);
4930                pos += 1;
4931            }
4932        }
4933
4934        // Insert remaining hashable positions in the tail (the matching loop
4935        // stops at HC_MIN_MATCH_LEN but insert_position only needs 4 bytes).
4936        while pos + 4 <= current_len {
4937            self.insert_position(current_abs_start + pos);
4938            pos += 1;
4939        }
4940
4941        if literals_start < current_len {
4942            let current = self.window.back().unwrap().as_slice();
4943            handle_sequence(Sequence::Literals {
4944                literals: &current[literals_start..],
4945            });
4946        }
4947    }
4948
4949    fn start_matching_optimal(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
4950        self.ensure_tables();
4951        let current_len = self.window.back().unwrap().len();
4952        if current_len == 0 {
4953            return;
4954        }
4955        let current_ptr = self.window.back().unwrap().as_ptr();
4956        // `start_matching_optimal()` mutates tables/state but never mutates or
4957        // reorders `self.window`, so this block slice remains valid for the
4958        // duration of the routine and avoids cloning the full block.
4959        let current = unsafe { core::slice::from_raw_parts(current_ptr, current_len) };
4960
4961        let current_abs_start = self.history_abs_start + self.window_size - current_len;
4962        let current_abs_end = current_abs_start + current_len;
4963        self.apply_limited_update_after_long_match(current_abs_start);
4964        let hash3_start_cursor = self.skip_insert_until_abs.max(self.history_abs_start);
4965        self.backfill_boundary_positions(current_abs_start, current_abs_end);
4966        self.next_to_update3 = hash3_start_cursor;
4967        self.prepare_ldm_candidates(current_abs_start, current_len);
4968
4969        if self.should_run_btultra2_seed_pass(current_len) {
4970            self.run_btultra2_seed_pass(current, current_abs_start, current_len);
4971        }
4972
4973        let profile = if self.parse_mode == HcParseMode::BtUltra2 {
4974            // Donor btultra2 runs one accurate opt pass after initStats_ultra.
4975            HcOptimalCostProfile::for_mode(self.parse_mode, true)
4976        } else {
4977            HcOptimalCostProfile::for_mode(self.parse_mode, false)
4978        };
4979        let mut opt_state = core::mem::replace(&mut self.opt_state, HcOptState::new());
4980        opt_state.rescale_freqs(current, profile);
4981        let mut best_plan = core::mem::take(&mut self.opt_segment_plan_scratch);
4982        best_plan.clear();
4983        let mut plan_reps = self.offset_hist;
4984        let (mut cursor, mut plan_litlen) =
4985            self.donor_opt_start_cursor_and_litlen(current_abs_start);
4986        let mut plan_literals_cursor = 0usize;
4987        let match_loop_limit = current_len.saturating_sub(8);
4988        while cursor < match_loop_limit {
4989            let remaining_len = current_len - cursor;
4990            let segment_abs_start = current_abs_start + cursor;
4991            let segment_start = best_plan.len();
4992            let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan(
4993                &current[cursor..],
4994                segment_abs_start,
4995                remaining_len,
4996                HcOptimalPlanState {
4997                    reps: plan_reps,
4998                    litlen: plan_litlen,
4999                    profile,
5000                },
5001                &opt_state,
5002                &mut best_plan,
5003            );
5004            Self::update_plan_stats_segment(
5005                current,
5006                current_len,
5007                &best_plan[segment_start..],
5008                &mut plan_literals_cursor,
5009                &mut plan_reps,
5010                &mut opt_state,
5011                profile.accurate,
5012            );
5013            plan_reps = end_reps;
5014            plan_litlen = end_litlen;
5015            cursor += consumed_len;
5016        }
5017
5018        self.emit_optimal_plan(current_len, &best_plan, &mut handle_sequence);
5019        best_plan.clear();
5020        self.opt_segment_plan_scratch = best_plan;
5021        self.opt_state = opt_state;
5022    }
5023
5024    fn run_btultra2_seed_pass(
5025        &mut self,
5026        current: &[u8],
5027        current_abs_start: usize,
5028        current_len: usize,
5029    ) {
5030        let seed_profile = HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, true);
5031        let mut opt_state = core::mem::replace(&mut self.opt_state, HcOptState::new());
5032        opt_state.rescale_freqs(current, seed_profile);
5033        let mut seed_reps = self.offset_hist;
5034        let (mut cursor, mut seed_litlen) =
5035            self.donor_opt_start_cursor_and_litlen(current_abs_start);
5036        let mut seed_literals_cursor = 0usize;
5037        let mut seed_plan = core::mem::take(&mut self.opt_seed_plan_scratch);
5038        seed_plan.clear();
5039        let match_loop_limit = current_len.saturating_sub(8);
5040        while cursor < match_loop_limit {
5041            let remaining_len = current_len - cursor;
5042            let segment_abs_start = current_abs_start + cursor;
5043            let segment_start = seed_plan.len();
5044            let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan(
5045                &current[cursor..],
5046                segment_abs_start,
5047                remaining_len,
5048                HcOptimalPlanState {
5049                    reps: seed_reps,
5050                    litlen: seed_litlen,
5051                    profile: seed_profile,
5052                },
5053                &opt_state,
5054                &mut seed_plan,
5055            );
5056            Self::update_plan_stats_segment(
5057                current,
5058                current_len,
5059                &seed_plan[segment_start..],
5060                &mut seed_literals_cursor,
5061                &mut seed_reps,
5062                &mut opt_state,
5063                seed_profile.accurate,
5064            );
5065            seed_plan.truncate(segment_start);
5066            seed_reps = end_reps;
5067            seed_litlen = end_litlen;
5068            cursor += consumed_len;
5069        }
5070        seed_plan.clear();
5071        self.opt_seed_plan_scratch = seed_plan;
5072        self.opt_state = opt_state;
5073
5074        // Donor initStats_ultra keeps the collected entropy statistics but
5075        // invalidates the first-pass matchfinder history before the real pass.
5076        self.position_base = self.history_abs_start;
5077        self.index_shift = current_len;
5078        self.next_to_update3 = current_abs_start;
5079        self.skip_insert_until_abs = current_abs_start;
5080        // Donor `ZSTD_initStats_ultra()` invalidates the first scan by moving
5081        // `window.base` back by `srcSize`, making the real pass start at
5082        // `curr == srcSize` instead of 0. Position 0 is therefore a valid
5083        // table entry in the second pass even though raw C tables reserve
5084        // value 0 as empty during an unshifted first pass.
5085        self.allow_zero_relative_position = true;
5086    }
5087
5088    fn update_plan_stats_segment(
5089        current: &[u8],
5090        current_len: usize,
5091        plan: &[HcOptimalSequence],
5092        literals_start: &mut usize,
5093        reps: &mut [u32; 3],
5094        opt_state: &mut HcOptState,
5095        accurate: bool,
5096    ) {
5097        if plan.is_empty() {
5098            return;
5099        }
5100        for item in plan {
5101            let lit_len = item.lit_len as usize;
5102            let match_len = item.match_len as usize;
5103            let start = literals_start.saturating_add(lit_len);
5104            if start < *literals_start || start + match_len > current_len {
5105                continue;
5106            }
5107            let literals = &current[*literals_start..start];
5108            let (off_base, next_reps) =
5109                Self::encode_offset_with_reps(item.offset, literals.len(), *reps);
5110            opt_state.update_stats(literals.len(), literals, off_base, match_len);
5111            *reps = next_reps;
5112            *literals_start = start + match_len;
5113        }
5114        opt_state.set_base_prices(accurate);
5115    }
5116
5117    fn build_optimal_plan(
5118        &mut self,
5119        current: &[u8],
5120        current_abs_start: usize,
5121        current_len: usize,
5122        initial_state: HcOptimalPlanState,
5123        stats: &HcOptState,
5124        out: &mut Vec<HcOptimalSequence>,
5125    ) -> (u32, [u32; 3], usize, usize) {
5126        let profile = initial_state.profile;
5127        match (profile.accurate, profile.favor_small_offsets) {
5128            (true, false) => self.build_optimal_plan_impl::<true, false>(
5129                current,
5130                current_abs_start,
5131                current_len,
5132                initial_state,
5133                stats,
5134                out,
5135            ),
5136            (true, true) => self.build_optimal_plan_impl::<true, true>(
5137                current,
5138                current_abs_start,
5139                current_len,
5140                initial_state,
5141                stats,
5142                out,
5143            ),
5144            (false, false) => self.build_optimal_plan_impl::<false, false>(
5145                current,
5146                current_abs_start,
5147                current_len,
5148                initial_state,
5149                stats,
5150                out,
5151            ),
5152            (false, true) => self.build_optimal_plan_impl::<false, true>(
5153                current,
5154                current_abs_start,
5155                current_len,
5156                initial_state,
5157                stats,
5158                out,
5159            ),
5160        }
5161    }
5162
5163    /// Cross-platform DP entry. Picks the kernel-specific variant so the
5164    /// entire optimal-parser DP body (per-position match gathering, price
5165    /// updates, traceback) runs inside a single `target_feature` umbrella
5166    /// alongside the per-position `collect_optimal_candidates_initialized_
5167    /// <kernel>`. This eliminates the final ABI barrier on the hot per-
5168    /// position match-collection call — the level22 critical path is now
5169    /// one straight-line inline chain from DP body down through BT walk
5170    /// and match-length probes.
5171    #[inline(always)]
5172    fn build_optimal_plan_impl<const ACCURATE_PRICE: bool, const FAVOR_SMALL_OFFSETS: bool>(
5173        &mut self,
5174        current: &[u8],
5175        current_abs_start: usize,
5176        current_len: usize,
5177        initial_state: HcOptimalPlanState,
5178        stats: &HcOptState,
5179        out: &mut Vec<HcOptimalSequence>,
5180    ) -> (u32, [u32; 3], usize, usize) {
5181        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5182        unsafe {
5183            self.build_optimal_plan_impl_neon::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5184                current,
5185                current_abs_start,
5186                current_len,
5187                initial_state,
5188                stats,
5189                out,
5190            )
5191        }
5192        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5193        {
5194            use crate::encoding::fastpath::{FastpathKernel, select_kernel};
5195            match select_kernel() {
5196                FastpathKernel::Avx2Bmi2 => unsafe {
5197                    self.build_optimal_plan_impl_avx2_bmi2::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5198                        current,
5199                        current_abs_start,
5200                        current_len,
5201                        initial_state,
5202                        stats,
5203                        out,
5204                    )
5205                },
5206                FastpathKernel::Sse42 => unsafe {
5207                    self.build_optimal_plan_impl_sse42::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5208                        current,
5209                        current_abs_start,
5210                        current_len,
5211                        initial_state,
5212                        stats,
5213                        out,
5214                    )
5215                },
5216                FastpathKernel::Scalar => self
5217                    .build_optimal_plan_impl_scalar::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5218                        current,
5219                        current_abs_start,
5220                        current_len,
5221                        initial_state,
5222                        stats,
5223                        out,
5224                    ),
5225            }
5226        }
5227        #[cfg(not(any(
5228            all(target_arch = "aarch64", target_endian = "little"),
5229            target_arch = "x86",
5230            target_arch = "x86_64"
5231        )))]
5232        {
5233            self.build_optimal_plan_impl_scalar::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5234                current,
5235                current_abs_start,
5236                current_len,
5237                initial_state,
5238                stats,
5239                out,
5240            )
5241        }
5242    }
5243
5244    /// NEON-umbrella DP body. Inlines
5245    /// `collect_optimal_candidates_initialized_neon` (and its entire
5246    /// per-position pipeline) directly into the DP loop.
5247    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5248    #[target_feature(enable = "neon")]
5249    unsafe fn build_optimal_plan_impl_neon<
5250        const ACCURATE_PRICE: bool,
5251        const FAVOR_SMALL_OFFSETS: bool,
5252    >(
5253        &mut self,
5254        current: &[u8],
5255        current_abs_start: usize,
5256        current_len: usize,
5257        initial_state: HcOptimalPlanState,
5258        stats: &HcOptState,
5259        out: &mut Vec<HcOptimalSequence>,
5260    ) -> (u32, [u32; 3], usize, usize) {
5261        build_optimal_plan_impl_body!(
5262            self,
5263            current,
5264            current_abs_start,
5265            current_len,
5266            initial_state,
5267            stats,
5268            out,
5269            collect_optimal_candidates_initialized_neon,
5270        )
5271    }
5272
5273    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5274    #[target_feature(enable = "sse4.2")]
5275    unsafe fn build_optimal_plan_impl_sse42<
5276        const ACCURATE_PRICE: bool,
5277        const FAVOR_SMALL_OFFSETS: bool,
5278    >(
5279        &mut self,
5280        current: &[u8],
5281        current_abs_start: usize,
5282        current_len: usize,
5283        initial_state: HcOptimalPlanState,
5284        stats: &HcOptState,
5285        out: &mut Vec<HcOptimalSequence>,
5286    ) -> (u32, [u32; 3], usize, usize) {
5287        build_optimal_plan_impl_body!(
5288            self,
5289            current,
5290            current_abs_start,
5291            current_len,
5292            initial_state,
5293            stats,
5294            out,
5295            collect_optimal_candidates_initialized_sse42,
5296        )
5297    }
5298
5299    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5300    #[target_feature(enable = "avx2,bmi2")]
5301    unsafe fn build_optimal_plan_impl_avx2_bmi2<
5302        const ACCURATE_PRICE: bool,
5303        const FAVOR_SMALL_OFFSETS: bool,
5304    >(
5305        &mut self,
5306        current: &[u8],
5307        current_abs_start: usize,
5308        current_len: usize,
5309        initial_state: HcOptimalPlanState,
5310        stats: &HcOptState,
5311        out: &mut Vec<HcOptimalSequence>,
5312    ) -> (u32, [u32; 3], usize, usize) {
5313        build_optimal_plan_impl_body!(
5314            self,
5315            current,
5316            current_abs_start,
5317            current_len,
5318            initial_state,
5319            stats,
5320            out,
5321            collect_optimal_candidates_initialized_avx2_bmi2,
5322        )
5323    }
5324
5325    #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
5326    // Body macros wrap callees in `unsafe { }` for the NEON/AVX/SSE
5327    // variants where callees are `unsafe fn`. The scalar wrappers route
5328    // through safe fns, so those blocks are redundant on this path.
5329    #[allow(unused_unsafe)]
5330    fn build_optimal_plan_impl_scalar<
5331        const ACCURATE_PRICE: bool,
5332        const FAVOR_SMALL_OFFSETS: bool,
5333    >(
5334        &mut self,
5335        current: &[u8],
5336        current_abs_start: usize,
5337        current_len: usize,
5338        initial_state: HcOptimalPlanState,
5339        stats: &HcOptState,
5340        out: &mut Vec<HcOptimalSequence>,
5341    ) -> (u32, [u32; 3], usize, usize) {
5342        build_optimal_plan_impl_body!(
5343            self,
5344            current,
5345            current_abs_start,
5346            current_len,
5347            initial_state,
5348            stats,
5349            out,
5350            collect_optimal_candidates_initialized_scalar,
5351        )
5352    }
5353
5354    fn finish_optimal_plan(
5355        &mut self,
5356        buffers: HcOptimalPlanBuffers,
5357        result: (u32, [u32; 3], usize, usize),
5358    ) -> (u32, [u32; 3], usize, usize) {
5359        let HcOptimalPlanBuffers {
5360            nodes,
5361            mut candidates,
5362            store,
5363            ll_prices,
5364            ll_price_generations,
5365            ml_prices,
5366            ml_price_generations,
5367        } = buffers;
5368        candidates.clear();
5369        self.opt_nodes_scratch = nodes;
5370        self.opt_candidates_scratch = candidates;
5371        self.opt_store_scratch = store;
5372        self.opt_ll_price_scratch = ll_prices;
5373        self.opt_ll_price_generation = ll_price_generations;
5374        self.opt_ml_price_scratch = ml_prices;
5375        self.opt_ml_price_generation = ml_price_generations;
5376        result
5377    }
5378
5379    #[inline(always)]
5380    fn reset_opt_nodes(nodes: &mut [HcOptimalNode], start: usize, end: usize) {
5381        for node in &mut nodes[start..=end] {
5382            Self::reset_opt_node(node);
5383        }
5384    }
5385
5386    #[inline(always)]
5387    fn reset_opt_node(node: &mut HcOptimalNode) {
5388        node.price = u32::MAX;
5389        // Donor only marks the slot as unreachable and not end-of-match here;
5390        // stale mlen is ignored while price is MAX and litlen is non-zero.
5391        node.litlen = u32::MAX;
5392    }
5393
5394    #[inline(always)]
5395    fn add_price_delta(price: u32, add: u32, delta: i32) -> u32 {
5396        #[cfg(debug_assertions)]
5397        {
5398            let sum = price as i64 + add as i64 + delta as i64;
5399            debug_assert!((0..=u32::MAX as i64).contains(&sum));
5400        }
5401        price.wrapping_add(add).wrapping_add_signed(delta)
5402    }
5403
5404    #[inline(always)]
5405    fn add_prices(lhs: u32, rhs: u32) -> u32 {
5406        let sum = lhs + rhs;
5407        debug_assert!(sum >= lhs);
5408        sum
5409    }
5410
5411    #[inline(always)]
5412    fn cached_literal_price(
5413        profile: HcOptimalCostProfile,
5414        stats: &HcOptState,
5415        byte: u8,
5416        prices: &mut [u32; HC_MAX_LIT + 1],
5417        generations: &mut [u32; HC_MAX_LIT + 1],
5418        stamp: u32,
5419    ) -> u32 {
5420        // SAFETY: `byte as usize` is `0..256` and the fixed-size arrays are
5421        // `[u32; HC_MAX_LIT + 1 = 257]`, so the index is statically in bounds.
5422        // Each cached_*_price call sits inside the optimal parser per-byte
5423        // hot loop where these bounds checks are pure overhead.
5424        let idx = byte as usize;
5425        unsafe {
5426            if *generations.get_unchecked(idx) == stamp {
5427                return *prices.get_unchecked(idx);
5428            }
5429            let price = profile.literal_price(stats, byte);
5430            *prices.get_unchecked_mut(idx) = price;
5431            *generations.get_unchecked_mut(idx) = stamp;
5432            price
5433        }
5434    }
5435
5436    #[inline(always)]
5437    fn cached_lit_length_price(
5438        profile: HcOptimalCostProfile,
5439        stats: &HcOptState,
5440        lit_len: usize,
5441        prices: &mut [u32],
5442        generations: &mut [u32],
5443        stamp: u32,
5444    ) -> u32 {
5445        if lit_len >= prices.len() {
5446            return profile.lit_length_price(stats, lit_len);
5447        }
5448        // SAFETY: the early-return above proves `lit_len < prices.len()`. The
5449        // matching `generations` slice is sized identically by the caller in
5450        // `build_optimal_plan_impl` (`opt_ll_price_scratch` /
5451        // `opt_ll_price_generation` are `resize`d together), so the same
5452        // index is in bounds for both.
5453        unsafe {
5454            if *generations.get_unchecked(lit_len) == stamp {
5455                return *prices.get_unchecked(lit_len);
5456            }
5457            let price = profile.lit_length_price(stats, lit_len);
5458            *prices.get_unchecked_mut(lit_len) = price;
5459            *generations.get_unchecked_mut(lit_len) = stamp;
5460            price
5461        }
5462    }
5463
5464    #[inline(always)]
5465    fn cached_lit_length_delta_price(
5466        profile: HcOptimalCostProfile,
5467        stats: &HcOptState,
5468        lit_len: usize,
5469        prices: &mut [u32],
5470        generations: &mut [u32],
5471        stamp: u32,
5472    ) -> i32 {
5473        if lit_len == 0 {
5474            return profile.lit_length_price(stats, lit_len) as i32
5475                - profile.lit_length_price(stats, lit_len.saturating_sub(1)) as i32;
5476        }
5477        let price =
5478            Self::cached_lit_length_price(profile, stats, lit_len, prices, generations, stamp);
5479        let previous =
5480            Self::cached_lit_length_price(profile, stats, lit_len - 1, prices, generations, stamp);
5481        price as i32 - previous as i32
5482    }
5483
5484    #[inline(always)]
5485    fn cached_match_length_price(
5486        profile: HcOptimalCostProfile,
5487        stats: &HcOptState,
5488        match_len: usize,
5489        prices: &mut [u32],
5490        generations: &mut [u32],
5491        stamp: u32,
5492    ) -> u32 {
5493        if match_len >= prices.len() {
5494            return profile.match_length_price(stats, match_len);
5495        }
5496        // SAFETY: see `cached_lit_length_price` — the caller co-sizes
5497        // `opt_ml_price_scratch` and `opt_ml_price_generation`, and the
5498        // early return proves `match_len < prices.len()`.
5499        unsafe {
5500            if *generations.get_unchecked(match_len) == stamp {
5501                return *prices.get_unchecked(match_len);
5502            }
5503            let price = profile.match_length_price(stats, match_len);
5504            *prices.get_unchecked_mut(match_len) = price;
5505            *generations.get_unchecked_mut(match_len) = stamp;
5506            price
5507        }
5508    }
5509
5510    #[cfg(test)]
5511    fn collect_optimal_candidates(
5512        &mut self,
5513        abs_pos: usize,
5514        current_abs_end: usize,
5515        profile: HcOptimalCostProfile,
5516        query: HcCandidateQuery,
5517        out: &mut Vec<MatchCandidate>,
5518    ) {
5519        self.ensure_tables();
5520        if self.uses_bt_matchfinder() {
5521            self.collect_optimal_candidates_initialized::<true>(
5522                abs_pos,
5523                current_abs_end,
5524                profile,
5525                query,
5526                out,
5527            );
5528        } else {
5529            self.collect_optimal_candidates_initialized::<false>(
5530                abs_pos,
5531                current_abs_end,
5532                profile,
5533                query,
5534                out,
5535            );
5536        }
5537    }
5538
5539    /// Cross-platform entry. Picks the kernel-specific variant so the per-
5540    /// position pipeline (BT-tree fill, rep probing, hash3 probing, BT
5541    /// collect / HC chain walk) runs inside a single `target_feature`
5542    /// umbrella — all inner SIMD probes inline without ABI barriers.
5543    ///
5544    /// The on-encode hot path bypasses this dispatcher: `build_optimal_plan_impl_<kernel>`
5545    /// calls the matching `_<kernel>` variant directly. This entry is kept
5546    /// for the cfg(test)-only `collect_optimal_candidates` shim and any
5547    /// future caller that isn't already inside a kernel umbrella.
5548    #[allow(dead_code)]
5549    #[inline(always)]
5550    fn collect_optimal_candidates_initialized<const USE_BT_MATCHFINDER: bool>(
5551        &mut self,
5552        abs_pos: usize,
5553        current_abs_end: usize,
5554        profile: HcOptimalCostProfile,
5555        query: HcCandidateQuery,
5556        out: &mut Vec<MatchCandidate>,
5557    ) {
5558        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5559        unsafe {
5560            self.collect_optimal_candidates_initialized_neon::<USE_BT_MATCHFINDER>(
5561                abs_pos,
5562                current_abs_end,
5563                profile,
5564                query,
5565                out,
5566            )
5567        }
5568        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5569        {
5570            use crate::encoding::fastpath::{FastpathKernel, select_kernel};
5571            match select_kernel() {
5572                FastpathKernel::Avx2Bmi2 => unsafe {
5573                    self.collect_optimal_candidates_initialized_avx2_bmi2::<USE_BT_MATCHFINDER>(
5574                        abs_pos,
5575                        current_abs_end,
5576                        profile,
5577                        query,
5578                        out,
5579                    )
5580                },
5581                FastpathKernel::Sse42 => unsafe {
5582                    self.collect_optimal_candidates_initialized_sse42::<USE_BT_MATCHFINDER>(
5583                        abs_pos,
5584                        current_abs_end,
5585                        profile,
5586                        query,
5587                        out,
5588                    )
5589                },
5590                FastpathKernel::Scalar => self
5591                    .collect_optimal_candidates_initialized_scalar::<USE_BT_MATCHFINDER>(
5592                        abs_pos,
5593                        current_abs_end,
5594                        profile,
5595                        query,
5596                        out,
5597                    ),
5598            }
5599        }
5600        #[cfg(not(any(
5601            all(target_arch = "aarch64", target_endian = "little"),
5602            target_arch = "x86",
5603            target_arch = "x86_64"
5604        )))]
5605        {
5606            self.collect_optimal_candidates_initialized_scalar::<USE_BT_MATCHFINDER>(
5607                abs_pos,
5608                current_abs_end,
5609                profile,
5610                query,
5611                out,
5612            )
5613        }
5614    }
5615
5616    /// NEON-umbrella variant. Every inner helper (`bt_update_tree_until_neon`,
5617    /// `for_each_repcode_candidate_with_reps_neon`, `hash3_candidate_neon`,
5618    /// `bt_insert_and_collect_matches_neon`, `fastpath::neon::
5619    /// common_prefix_len_ptr`) shares the NEON umbrella so the per-position
5620    /// pipeline executes as a single straight-line inline sequence.
5621    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5622    #[target_feature(enable = "neon")]
5623    unsafe fn collect_optimal_candidates_initialized_neon<const USE_BT_MATCHFINDER: bool>(
5624        &mut self,
5625        abs_pos: usize,
5626        current_abs_end: usize,
5627        profile: HcOptimalCostProfile,
5628        query: HcCandidateQuery,
5629        out: &mut Vec<MatchCandidate>,
5630    ) {
5631        collect_optimal_candidates_initialized_body!(
5632            self,
5633            abs_pos,
5634            current_abs_end,
5635            profile,
5636            query,
5637            out,
5638            USE_BT_MATCHFINDER,
5639            bt_update_tree_until_neon,
5640            bt_insert_and_collect_matches_neon,
5641            for_each_repcode_candidate_with_reps_neon,
5642            hash3_candidate_neon,
5643            crate::encoding::fastpath::neon::common_prefix_len_ptr,
5644        )
5645    }
5646
5647    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5648    #[target_feature(enable = "sse4.2")]
5649    unsafe fn collect_optimal_candidates_initialized_sse42<const USE_BT_MATCHFINDER: bool>(
5650        &mut self,
5651        abs_pos: usize,
5652        current_abs_end: usize,
5653        profile: HcOptimalCostProfile,
5654        query: HcCandidateQuery,
5655        out: &mut Vec<MatchCandidate>,
5656    ) {
5657        collect_optimal_candidates_initialized_body!(
5658            self,
5659            abs_pos,
5660            current_abs_end,
5661            profile,
5662            query,
5663            out,
5664            USE_BT_MATCHFINDER,
5665            bt_update_tree_until_sse42,
5666            bt_insert_and_collect_matches_sse42,
5667            for_each_repcode_candidate_with_reps_sse42,
5668            hash3_candidate_sse42,
5669            crate::encoding::fastpath::sse42::common_prefix_len_ptr,
5670        )
5671    }
5672
5673    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5674    #[target_feature(enable = "avx2,bmi2")]
5675    unsafe fn collect_optimal_candidates_initialized_avx2_bmi2<const USE_BT_MATCHFINDER: bool>(
5676        &mut self,
5677        abs_pos: usize,
5678        current_abs_end: usize,
5679        profile: HcOptimalCostProfile,
5680        query: HcCandidateQuery,
5681        out: &mut Vec<MatchCandidate>,
5682    ) {
5683        collect_optimal_candidates_initialized_body!(
5684            self,
5685            abs_pos,
5686            current_abs_end,
5687            profile,
5688            query,
5689            out,
5690            USE_BT_MATCHFINDER,
5691            bt_update_tree_until_avx2_bmi2,
5692            bt_insert_and_collect_matches_avx2_bmi2,
5693            for_each_repcode_candidate_with_reps_avx2_bmi2,
5694            hash3_candidate_avx2_bmi2,
5695            crate::encoding::fastpath::avx2_bmi2::common_prefix_len_ptr,
5696        )
5697    }
5698
5699    #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
5700    // Macro emits `unsafe { }` wrappers for NEON/AVX/SSE variants; scalar
5701    // callees are safe so the blocks are redundant here only.
5702    #[allow(unused_unsafe)]
5703    fn collect_optimal_candidates_initialized_scalar<const USE_BT_MATCHFINDER: bool>(
5704        &mut self,
5705        abs_pos: usize,
5706        current_abs_end: usize,
5707        profile: HcOptimalCostProfile,
5708        query: HcCandidateQuery,
5709        out: &mut Vec<MatchCandidate>,
5710    ) {
5711        collect_optimal_candidates_initialized_body!(
5712            self,
5713            abs_pos,
5714            current_abs_end,
5715            profile,
5716            query,
5717            out,
5718            USE_BT_MATCHFINDER,
5719            bt_update_tree_until_scalar,
5720            bt_insert_and_collect_matches_scalar,
5721            for_each_repcode_candidate_with_reps_scalar,
5722            hash3_candidate_scalar,
5723            crate::encoding::fastpath::scalar::common_prefix_len_ptr,
5724        )
5725    }
5726
5727    fn push_candidate_ladder(
5728        out: &mut Vec<MatchCandidate>,
5729        best_len_for_skip: &mut usize,
5730        candidate: MatchCandidate,
5731        min_match_len: usize,
5732    ) -> bool {
5733        if candidate.match_len < min_match_len {
5734            return false;
5735        }
5736        if candidate.match_len > *best_len_for_skip {
5737            out.push(candidate);
5738            *best_len_for_skip = candidate.match_len;
5739            return true;
5740        }
5741        false
5742    }
5743
5744    fn ldm_skip_raw_seq_store_bytes(&self, seq_store: &mut HcRawSeqStore, nb_bytes: usize) {
5745        let mut curr_pos = seq_store.pos_in_sequence.saturating_add(nb_bytes);
5746        while curr_pos > 0 && seq_store.pos < seq_store.size {
5747            let curr_seq = self.ldm_sequences[seq_store.pos];
5748            let seq_len = curr_seq.lit_length.saturating_add(curr_seq.match_length);
5749            if curr_pos >= seq_len {
5750                curr_pos -= seq_len;
5751                seq_store.pos += 1;
5752            } else {
5753                seq_store.pos_in_sequence = curr_pos;
5754                break;
5755            }
5756        }
5757        if curr_pos == 0 || seq_store.pos == seq_store.size {
5758            seq_store.pos_in_sequence = 0;
5759        }
5760    }
5761
5762    fn ldm_get_next_match_and_update_seq_store(
5763        &self,
5764        opt_ldm: &mut HcOptLdmState,
5765        curr_pos_in_block: usize,
5766        block_bytes_remaining: usize,
5767    ) {
5768        if opt_ldm.seq_store.size == 0 || opt_ldm.seq_store.pos >= opt_ldm.seq_store.size {
5769            opt_ldm.start_pos_in_block = usize::MAX;
5770            opt_ldm.end_pos_in_block = usize::MAX;
5771            return;
5772        }
5773        let curr_seq = self.ldm_sequences[opt_ldm.seq_store.pos];
5774        let curr_block_end_pos = curr_pos_in_block.saturating_add(block_bytes_remaining);
5775        let literals_bytes_remaining = curr_seq
5776            .lit_length
5777            .saturating_sub(opt_ldm.seq_store.pos_in_sequence);
5778        let match_bytes_remaining = if literals_bytes_remaining == 0 {
5779            curr_seq.match_length.saturating_sub(
5780                opt_ldm
5781                    .seq_store
5782                    .pos_in_sequence
5783                    .saturating_sub(curr_seq.lit_length),
5784            )
5785        } else {
5786            curr_seq.match_length
5787        };
5788        if literals_bytes_remaining >= block_bytes_remaining {
5789            opt_ldm.start_pos_in_block = usize::MAX;
5790            opt_ldm.end_pos_in_block = usize::MAX;
5791            self.ldm_skip_raw_seq_store_bytes(&mut opt_ldm.seq_store, block_bytes_remaining);
5792            return;
5793        }
5794        opt_ldm.start_pos_in_block = curr_pos_in_block.saturating_add(literals_bytes_remaining);
5795        opt_ldm.end_pos_in_block = opt_ldm
5796            .start_pos_in_block
5797            .saturating_add(match_bytes_remaining);
5798        opt_ldm.offset = curr_seq.offset;
5799        if opt_ldm.end_pos_in_block > curr_block_end_pos {
5800            opt_ldm.end_pos_in_block = curr_block_end_pos;
5801            self.ldm_skip_raw_seq_store_bytes(
5802                &mut opt_ldm.seq_store,
5803                curr_block_end_pos.saturating_sub(curr_pos_in_block),
5804            );
5805        } else {
5806            self.ldm_skip_raw_seq_store_bytes(
5807                &mut opt_ldm.seq_store,
5808                literals_bytes_remaining.saturating_add(match_bytes_remaining),
5809            );
5810        }
5811    }
5812
5813    fn ldm_maybe_add_match(
5814        &self,
5815        opt_ldm: &HcOptLdmState,
5816        curr_pos_in_block: usize,
5817        min_match: usize,
5818    ) -> Option<MatchCandidate> {
5819        let pos_diff = curr_pos_in_block.saturating_sub(opt_ldm.start_pos_in_block);
5820        let candidate_match_length = opt_ldm
5821            .end_pos_in_block
5822            .saturating_sub(opt_ldm.start_pos_in_block)
5823            .saturating_sub(pos_diff);
5824        if curr_pos_in_block < opt_ldm.start_pos_in_block
5825            || curr_pos_in_block >= opt_ldm.end_pos_in_block
5826            || candidate_match_length < min_match
5827        {
5828            return None;
5829        }
5830        Some(MatchCandidate {
5831            start: curr_pos_in_block,
5832            offset: opt_ldm.offset,
5833            match_len: candidate_match_length,
5834        })
5835    }
5836
5837    fn ldm_process_match_candidate(
5838        &self,
5839        opt_ldm: &mut HcOptLdmState,
5840        curr_pos_in_block: usize,
5841        remaining_bytes: usize,
5842        min_match: usize,
5843    ) -> Option<MatchCandidate> {
5844        if opt_ldm.seq_store.size == 0 || opt_ldm.seq_store.pos >= opt_ldm.seq_store.size {
5845            return None;
5846        }
5847        if curr_pos_in_block >= opt_ldm.end_pos_in_block {
5848            if curr_pos_in_block > opt_ldm.end_pos_in_block {
5849                let pos_overshoot = curr_pos_in_block.saturating_sub(opt_ldm.end_pos_in_block);
5850                self.ldm_skip_raw_seq_store_bytes(&mut opt_ldm.seq_store, pos_overshoot);
5851            }
5852            self.ldm_get_next_match_and_update_seq_store(
5853                opt_ldm,
5854                curr_pos_in_block,
5855                remaining_bytes,
5856            );
5857        }
5858        self.ldm_maybe_add_match(opt_ldm, curr_pos_in_block, min_match)
5859    }
5860
5861    fn prepare_ldm_candidates(&mut self, current_abs_start: usize, current_len: usize) {
5862        self.ldm_sequences.clear();
5863        let _ = (current_abs_start, current_len);
5864        // Donor parity: btopt/btultra/btultra2 only merge LDM candidates when
5865        // a real ldmSeqStore exists (`enableLdm == ZSTD_ps_enable`).
5866        // This Rust encoder does not expose a donor-equivalent LDM producer or
5867        // runtime switch yet, so the ordinary level-22 path must remain LDM-free.
5868    }
5869
5870    fn emit_optimal_plan(
5871        &mut self,
5872        current_len: usize,
5873        plan: &[HcOptimalSequence],
5874        handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
5875    ) {
5876        let current = self.window.back().unwrap().as_slice();
5877        if plan.is_empty() {
5878            handle_sequence(Sequence::Literals { literals: current });
5879            return;
5880        }
5881
5882        let mut literals_start = 0usize;
5883        for item in plan {
5884            let lit_len = item.lit_len as usize;
5885            let match_len = item.match_len as usize;
5886            let start = literals_start.saturating_add(lit_len);
5887            if start < literals_start || start + match_len > current_len {
5888                continue;
5889            }
5890            let literals = &current[literals_start..start];
5891            handle_sequence(Sequence::Triple {
5892                literals,
5893                offset: item.offset as usize,
5894                match_len,
5895            });
5896            encode_offset_with_history(item.offset, literals.len() as u32, &mut self.offset_hist);
5897            literals_start = start + match_len;
5898        }
5899
5900        if literals_start < current_len {
5901            handle_sequence(Sequence::Literals {
5902                literals: &current[literals_start..],
5903            });
5904        }
5905    }
5906
5907    fn ensure_tables(&mut self) {
5908        if self.hash_table.is_empty() {
5909            self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
5910            let hash3_size = if self.hash3_log == 0 {
5911                0
5912            } else {
5913                1 << self.hash3_log
5914            };
5915            self.hash3_table = alloc::vec![HC_EMPTY; hash3_size];
5916            self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
5917        }
5918    }
5919
5920    fn compact_history(&mut self) {
5921        if self.history_start == 0 {
5922            return;
5923        }
5924        if self.history_start >= self.max_window_size
5925            || self.history_start * 2 >= self.history.len()
5926        {
5927            self.history.drain(..self.history_start);
5928            self.history_start = 0;
5929        }
5930    }
5931
5932    fn live_history(&self) -> &[u8] {
5933        &self.history[self.history_start..]
5934    }
5935
5936    fn history_abs_end(&self) -> usize {
5937        self.history_abs_start + self.live_history().len()
5938    }
5939
5940    fn uses_bt_matchfinder(&self) -> bool {
5941        matches!(
5942            self.parse_mode,
5943            HcParseMode::BtOpt | HcParseMode::BtUltra | HcParseMode::BtUltra2
5944        )
5945    }
5946
5947    fn bt_log(&self) -> usize {
5948        self.chain_log.saturating_sub(1)
5949    }
5950
5951    fn bt_mask(&self) -> usize {
5952        (1usize << self.bt_log()) - 1
5953    }
5954
5955    fn bt_pair_index_for_abs(&self, abs_pos: usize) -> usize {
5956        2 * (abs_pos.saturating_add(self.index_shift) & self.bt_mask())
5957    }
5958
5959    #[inline(always)]
5960    fn stored_abs_position_fast(
5961        stored: u32,
5962        position_base: usize,
5963        index_shift: usize,
5964    ) -> Option<usize> {
5965        if stored == HC_EMPTY {
5966            return None;
5967        }
5968        let shifted = position_base + (stored as usize - 1);
5969        if shifted < index_shift {
5970            return None;
5971        }
5972        Some(shifted - index_shift)
5973    }
5974
5975    fn window_low_abs_for_target(&self, target_abs: usize) -> usize {
5976        let history_low = self.history_abs_start;
5977        let window_low = target_abs.saturating_sub(self.max_window_size);
5978        history_low.max(window_low)
5979    }
5980
5981    fn bt_hash_mls(&self) -> usize {
5982        // Donor parity: even when `minMatch == 3` (btultra2), the main BT/HC
5983        // hash still goes through `ZSTD_hashPtr(..., mls)` which falls back to
5984        // the default `case 4` in `zstd_compress_internal.h`. The 3-byte path
5985        // is a separate HC3 side table only.
5986        4
5987    }
5988
5989    /// Cross-platform entry. Picks the kernel-specific variant so the BT walk
5990    /// body executes inside one `target_feature` umbrella and inlines the
5991    /// vectorized `count_match_from_indices` directly.
5992    #[inline(always)]
5993    fn bt_insert_step_no_rebase(
5994        &mut self,
5995        abs_pos: usize,
5996        current_abs_end: usize,
5997        target_abs: usize,
5998    ) -> usize {
5999        // SAFETY: each branch verifies the target_feature requirement of the
6000        // callee — aarch64 NEON is baseline; x86 AVX2/BMI2 and SSE4.2 are
6001        // selected only when the runtime detector reports them present.
6002        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6003        unsafe {
6004            self.bt_insert_step_no_rebase_neon(abs_pos, current_abs_end, target_abs)
6005        }
6006        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6007        {
6008            use crate::encoding::fastpath::{FastpathKernel, select_kernel};
6009            match select_kernel() {
6010                FastpathKernel::Avx2Bmi2 => unsafe {
6011                    self.bt_insert_step_no_rebase_avx2_bmi2(abs_pos, current_abs_end, target_abs)
6012                },
6013                FastpathKernel::Sse42 => unsafe {
6014                    self.bt_insert_step_no_rebase_sse42(abs_pos, current_abs_end, target_abs)
6015                },
6016                FastpathKernel::Scalar => {
6017                    self.bt_insert_step_no_rebase_scalar(abs_pos, current_abs_end, target_abs)
6018                }
6019            }
6020        }
6021        #[cfg(not(any(
6022            all(target_arch = "aarch64", target_endian = "little"),
6023            target_arch = "x86",
6024            target_arch = "x86_64"
6025        )))]
6026        {
6027            self.bt_insert_step_no_rebase_scalar(abs_pos, current_abs_end, target_abs)
6028        }
6029    }
6030
6031    /// NEON-umbrella variant: body inlines `fastpath::neon::count_match_from_indices`.
6032    /// AArch64 only. Future x86 variants (`_sse42`, `_avx2_bmi2`) follow the
6033    /// same pattern with their respective umbrellas.
6034    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6035    #[target_feature(enable = "neon")]
6036    unsafe fn bt_insert_step_no_rebase_neon(
6037        &mut self,
6038        abs_pos: usize,
6039        current_abs_end: usize,
6040        target_abs: usize,
6041    ) -> usize {
6042        bt_insert_step_no_rebase_body!(
6043            self,
6044            abs_pos,
6045            current_abs_end,
6046            target_abs,
6047            crate::encoding::fastpath::neon::count_match_from_indices
6048        )
6049    }
6050
6051    /// SSE4.2-umbrella variant. Calls `fastpath::sse42::count_match_from_indices`.
6052    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6053    #[target_feature(enable = "sse4.2")]
6054    unsafe fn bt_insert_step_no_rebase_sse42(
6055        &mut self,
6056        abs_pos: usize,
6057        current_abs_end: usize,
6058        target_abs: usize,
6059    ) -> usize {
6060        bt_insert_step_no_rebase_body!(
6061            self,
6062            abs_pos,
6063            current_abs_end,
6064            target_abs,
6065            crate::encoding::fastpath::sse42::count_match_from_indices
6066        )
6067    }
6068
6069    /// AVX2+BMI2-umbrella variant. Calls
6070    /// `fastpath::avx2_bmi2::count_match_from_indices`.
6071    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6072    #[target_feature(enable = "avx2,bmi2")]
6073    unsafe fn bt_insert_step_no_rebase_avx2_bmi2(
6074        &mut self,
6075        abs_pos: usize,
6076        current_abs_end: usize,
6077        target_abs: usize,
6078    ) -> usize {
6079        bt_insert_step_no_rebase_body!(
6080            self,
6081            abs_pos,
6082            current_abs_end,
6083            target_abs,
6084            crate::encoding::fastpath::avx2_bmi2::count_match_from_indices
6085        )
6086    }
6087
6088    /// Scalar fallback used on non-AArch64 targets (and when no SIMD kernel
6089    /// is selected). Routes through `fastpath::scalar::count_match_from_indices`
6090    /// directly so the call site remains the same shape as the SIMD variants.
6091    #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
6092    fn bt_insert_step_no_rebase_scalar(
6093        &mut self,
6094        abs_pos: usize,
6095        current_abs_end: usize,
6096        target_abs: usize,
6097    ) -> usize {
6098        bt_insert_step_no_rebase_body!(
6099            self,
6100            abs_pos,
6101            current_abs_end,
6102            target_abs,
6103            crate::encoding::fastpath::scalar::count_match_from_indices
6104        )
6105    }
6106
6107    /// Cross-platform entry. Picks the kernel-specific variant so the BT-tree
6108    /// update loop runs inside the same `target_feature` umbrella as the per-
6109    /// position `bt_insert_step_no_rebase` it calls — eliminating one ABI
6110    /// barrier per fill iteration.
6111    #[inline(always)]
6112    fn bt_update_tree_until(&mut self, abs_pos: usize, current_abs_end: usize) {
6113        // SAFETY: each branch verifies the target_feature requirement of the
6114        // callee (see `bt_insert_step_no_rebase` dispatcher).
6115        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6116        unsafe {
6117            self.bt_update_tree_until_neon(abs_pos, current_abs_end)
6118        }
6119        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6120        {
6121            use crate::encoding::fastpath::{FastpathKernel, select_kernel};
6122            match select_kernel() {
6123                FastpathKernel::Avx2Bmi2 => unsafe {
6124                    self.bt_update_tree_until_avx2_bmi2(abs_pos, current_abs_end)
6125                },
6126                FastpathKernel::Sse42 => unsafe {
6127                    self.bt_update_tree_until_sse42(abs_pos, current_abs_end)
6128                },
6129                FastpathKernel::Scalar => {
6130                    self.bt_update_tree_until_scalar(abs_pos, current_abs_end)
6131                }
6132            }
6133        }
6134        #[cfg(not(any(
6135            all(target_arch = "aarch64", target_endian = "little"),
6136            target_arch = "x86",
6137            target_arch = "x86_64"
6138        )))]
6139        {
6140            self.bt_update_tree_until_scalar(abs_pos, current_abs_end)
6141        }
6142    }
6143
6144    /// NEON-umbrella variant: per-iteration `bt_insert_step_no_rebase_neon`
6145    /// inlines into the body because both share the `target_feature = "neon"`
6146    /// umbrella.
6147    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6148    #[target_feature(enable = "neon")]
6149    unsafe fn bt_update_tree_until_neon(&mut self, abs_pos: usize, current_abs_end: usize) {
6150        if self.skip_insert_until_abs < self.history_abs_start {
6151            self.skip_insert_until_abs = self.history_abs_start;
6152        }
6153        let mut update_abs = self.skip_insert_until_abs;
6154        while update_abs < abs_pos {
6155            if !self.can_skip_rebase_check_at(update_abs, abs_pos) {
6156                self.maybe_rebase_positions(update_abs);
6157            }
6158            // SAFETY: same NEON umbrella; direct call inlines the BT-walk body.
6159            let forward =
6160                unsafe { self.bt_insert_step_no_rebase_neon(update_abs, current_abs_end, abs_pos) };
6161            update_abs = update_abs.saturating_add(forward.max(1));
6162        }
6163        self.skip_insert_until_abs = abs_pos;
6164    }
6165
6166    /// SSE4.2 umbrella variant.
6167    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6168    #[target_feature(enable = "sse4.2")]
6169    unsafe fn bt_update_tree_until_sse42(&mut self, abs_pos: usize, current_abs_end: usize) {
6170        if self.skip_insert_until_abs < self.history_abs_start {
6171            self.skip_insert_until_abs = self.history_abs_start;
6172        }
6173        let mut update_abs = self.skip_insert_until_abs;
6174        while update_abs < abs_pos {
6175            if !self.can_skip_rebase_check_at(update_abs, abs_pos) {
6176                self.maybe_rebase_positions(update_abs);
6177            }
6178            let forward = unsafe {
6179                self.bt_insert_step_no_rebase_sse42(update_abs, current_abs_end, abs_pos)
6180            };
6181            update_abs = update_abs.saturating_add(forward.max(1));
6182        }
6183        self.skip_insert_until_abs = abs_pos;
6184    }
6185
6186    /// AVX2+BMI2 umbrella variant.
6187    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6188    #[target_feature(enable = "avx2,bmi2")]
6189    unsafe fn bt_update_tree_until_avx2_bmi2(&mut self, abs_pos: usize, current_abs_end: usize) {
6190        if self.skip_insert_until_abs < self.history_abs_start {
6191            self.skip_insert_until_abs = self.history_abs_start;
6192        }
6193        let mut update_abs = self.skip_insert_until_abs;
6194        while update_abs < abs_pos {
6195            if !self.can_skip_rebase_check_at(update_abs, abs_pos) {
6196                self.maybe_rebase_positions(update_abs);
6197            }
6198            let forward = unsafe {
6199                self.bt_insert_step_no_rebase_avx2_bmi2(update_abs, current_abs_end, abs_pos)
6200            };
6201            update_abs = update_abs.saturating_add(forward.max(1));
6202        }
6203        self.skip_insert_until_abs = abs_pos;
6204    }
6205
6206    /// Scalar fallback used on non-AArch64 targets.
6207    #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
6208    fn bt_update_tree_until_scalar(&mut self, abs_pos: usize, current_abs_end: usize) {
6209        if self.skip_insert_until_abs < self.history_abs_start {
6210            self.skip_insert_until_abs = self.history_abs_start;
6211        }
6212        let mut update_abs = self.skip_insert_until_abs;
6213        while update_abs < abs_pos {
6214            if !self.can_skip_rebase_check_at(update_abs, abs_pos) {
6215                self.maybe_rebase_positions(update_abs);
6216            }
6217            let forward =
6218                self.bt_insert_step_no_rebase_scalar(update_abs, current_abs_end, abs_pos);
6219            update_abs = update_abs.saturating_add(forward.max(1));
6220        }
6221        self.skip_insert_until_abs = abs_pos;
6222    }
6223
6224    /// Cross-platform entry. Picks the kernel-specific variant so the BT walk
6225    /// body executes inside one `target_feature` umbrella and inlines the
6226    /// vectorized `count_match_from_indices` directly. See
6227    /// `bt_insert_step_no_rebase` for the same dispatcher pattern.
6228    ///
6229    /// The on-encode hot path bypasses this dispatcher: when invoked from
6230    /// `collect_optimal_candidates_initialized_<kernel>` the per-kernel
6231    /// variant is called directly so the BT match collection inlines under
6232    /// the surrounding umbrella. This entry is kept for external / future
6233    /// callers that aren't yet under an umbrella.
6234    #[allow(dead_code)]
6235    #[inline(always)]
6236    fn bt_insert_and_collect_matches(
6237        &mut self,
6238        abs_pos: usize,
6239        current_abs_end: usize,
6240        profile: HcOptimalCostProfile,
6241        min_match_len: usize,
6242        best_len_for_skip: &mut usize,
6243        out: &mut Vec<MatchCandidate>,
6244    ) {
6245        // SAFETY: each branch verifies the target_feature requirement of the
6246        // callee (see `bt_insert_step_no_rebase` dispatcher).
6247        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6248        unsafe {
6249            self.bt_insert_and_collect_matches_neon(
6250                abs_pos,
6251                current_abs_end,
6252                profile,
6253                min_match_len,
6254                best_len_for_skip,
6255                out,
6256            )
6257        }
6258        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6259        {
6260            use crate::encoding::fastpath::{FastpathKernel, select_kernel};
6261            match select_kernel() {
6262                FastpathKernel::Avx2Bmi2 => unsafe {
6263                    self.bt_insert_and_collect_matches_avx2_bmi2(
6264                        abs_pos,
6265                        current_abs_end,
6266                        profile,
6267                        min_match_len,
6268                        best_len_for_skip,
6269                        out,
6270                    )
6271                },
6272                FastpathKernel::Sse42 => unsafe {
6273                    self.bt_insert_and_collect_matches_sse42(
6274                        abs_pos,
6275                        current_abs_end,
6276                        profile,
6277                        min_match_len,
6278                        best_len_for_skip,
6279                        out,
6280                    )
6281                },
6282                FastpathKernel::Scalar => self.bt_insert_and_collect_matches_scalar(
6283                    abs_pos,
6284                    current_abs_end,
6285                    profile,
6286                    min_match_len,
6287                    best_len_for_skip,
6288                    out,
6289                ),
6290            }
6291        }
6292        #[cfg(not(any(
6293            all(target_arch = "aarch64", target_endian = "little"),
6294            target_arch = "x86",
6295            target_arch = "x86_64"
6296        )))]
6297        {
6298            self.bt_insert_and_collect_matches_scalar(
6299                abs_pos,
6300                current_abs_end,
6301                profile,
6302                min_match_len,
6303                best_len_for_skip,
6304                out,
6305            )
6306        }
6307    }
6308
6309    /// NEON-umbrella variant of `bt_insert_and_collect_matches`. Inlines
6310    /// `fastpath::neon::count_match_from_indices` via the shared body macro.
6311    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6312    #[target_feature(enable = "neon")]
6313    unsafe fn bt_insert_and_collect_matches_neon(
6314        &mut self,
6315        abs_pos: usize,
6316        current_abs_end: usize,
6317        profile: HcOptimalCostProfile,
6318        min_match_len: usize,
6319        best_len_for_skip: &mut usize,
6320        out: &mut Vec<MatchCandidate>,
6321    ) {
6322        bt_insert_and_collect_matches_body!(
6323            self,
6324            abs_pos,
6325            current_abs_end,
6326            profile,
6327            min_match_len,
6328            best_len_for_skip,
6329            out,
6330            crate::encoding::fastpath::neon::count_match_from_indices,
6331        )
6332    }
6333
6334    /// SSE4.2 umbrella variant of `bt_insert_and_collect_matches`.
6335    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6336    #[target_feature(enable = "sse4.2")]
6337    unsafe fn bt_insert_and_collect_matches_sse42(
6338        &mut self,
6339        abs_pos: usize,
6340        current_abs_end: usize,
6341        profile: HcOptimalCostProfile,
6342        min_match_len: usize,
6343        best_len_for_skip: &mut usize,
6344        out: &mut Vec<MatchCandidate>,
6345    ) {
6346        bt_insert_and_collect_matches_body!(
6347            self,
6348            abs_pos,
6349            current_abs_end,
6350            profile,
6351            min_match_len,
6352            best_len_for_skip,
6353            out,
6354            crate::encoding::fastpath::sse42::count_match_from_indices,
6355        )
6356    }
6357
6358    /// AVX2+BMI2 umbrella variant of `bt_insert_and_collect_matches`.
6359    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6360    #[target_feature(enable = "avx2,bmi2")]
6361    unsafe fn bt_insert_and_collect_matches_avx2_bmi2(
6362        &mut self,
6363        abs_pos: usize,
6364        current_abs_end: usize,
6365        profile: HcOptimalCostProfile,
6366        min_match_len: usize,
6367        best_len_for_skip: &mut usize,
6368        out: &mut Vec<MatchCandidate>,
6369    ) {
6370        bt_insert_and_collect_matches_body!(
6371            self,
6372            abs_pos,
6373            current_abs_end,
6374            profile,
6375            min_match_len,
6376            best_len_for_skip,
6377            out,
6378            crate::encoding::fastpath::avx2_bmi2::count_match_from_indices,
6379        )
6380    }
6381
6382    /// Scalar fallback used on non-AArch64 targets.
6383    #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
6384    fn bt_insert_and_collect_matches_scalar(
6385        &mut self,
6386        abs_pos: usize,
6387        current_abs_end: usize,
6388        profile: HcOptimalCostProfile,
6389        min_match_len: usize,
6390        best_len_for_skip: &mut usize,
6391        out: &mut Vec<MatchCandidate>,
6392    ) {
6393        bt_insert_and_collect_matches_body!(
6394            self,
6395            abs_pos,
6396            current_abs_end,
6397            profile,
6398            min_match_len,
6399            best_len_for_skip,
6400            out,
6401            crate::encoding::fastpath::scalar::count_match_from_indices,
6402        )
6403    }
6404
6405    fn hash_position_with_mls(data: &[u8], hash_log: usize, mls: usize) -> usize {
6406        let value = Self::read_le_u32(data);
6407        Self::hash_value_with_mls(value, hash_log, mls)
6408    }
6409
6410    #[inline(always)]
6411    fn hash_position_at(data: &[u8], idx: usize, hash_log: usize, mls: usize) -> usize {
6412        debug_assert!(idx + 4 <= data.len());
6413        let value = unsafe { Self::read_le_u32_ptr(data.as_ptr().add(idx)) };
6414        Self::hash_value_with_mls(value, hash_log, mls)
6415    }
6416
6417    #[inline(always)]
6418    fn hash_value_with_mls(value: u32, hash_log: usize, mls: usize) -> usize {
6419        match mls {
6420            3 => (((value << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - hash_log)) as usize,
6421            _ => ((value.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hash_log)) as usize,
6422        }
6423    }
6424
6425    fn hash_position(&self, data: &[u8]) -> usize {
6426        Self::hash_position_with_mls(data, self.hash_log, 4)
6427    }
6428
6429    #[cfg(test)]
6430    fn hash3_position(data: &[u8], hash_log: usize) -> usize {
6431        let value = Self::read_le_u32(data);
6432        (((value << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - hash_log)) as usize
6433    }
6434
6435    #[inline(always)]
6436    fn read_le_u32(data: &[u8]) -> u32 {
6437        debug_assert!(data.len() >= 4);
6438        unsafe { Self::read_le_u32_ptr(data.as_ptr()) }
6439    }
6440
6441    #[inline(always)]
6442    unsafe fn read_le_u32_ptr(ptr: *const u8) -> u32 {
6443        unsafe { u32::from_le(core::ptr::read_unaligned(ptr as *const u32)) }
6444    }
6445
6446    /// Cross-platform entry. Dispatches to the kernel-specific variant.
6447    /// Retained so external callers / future code still have a stable shim;
6448    /// the on-encode hot path bypasses this dispatcher via the kernel-specific
6449    /// `_neon`/`_sse42`/`_avx2_bmi2`/`_scalar` variants invoked from inside
6450    /// `collect_optimal_candidates_initialized_<kernel>`.
6451    #[allow(dead_code)]
6452    #[inline(always)]
6453    fn hash3_candidate(
6454        &self,
6455        abs_pos: usize,
6456        current_abs_end: usize,
6457        min_match_len: usize,
6458    ) -> Option<MatchCandidate> {
6459        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6460        unsafe {
6461            self.hash3_candidate_neon(abs_pos, current_abs_end, min_match_len)
6462        }
6463        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6464        {
6465            use crate::encoding::fastpath::{FastpathKernel, select_kernel};
6466            match select_kernel() {
6467                FastpathKernel::Avx2Bmi2 => unsafe {
6468                    self.hash3_candidate_avx2_bmi2(abs_pos, current_abs_end, min_match_len)
6469                },
6470                FastpathKernel::Sse42 => unsafe {
6471                    self.hash3_candidate_sse42(abs_pos, current_abs_end, min_match_len)
6472                },
6473                FastpathKernel::Scalar => {
6474                    self.hash3_candidate_scalar(abs_pos, current_abs_end, min_match_len)
6475                }
6476            }
6477        }
6478        #[cfg(not(any(
6479            all(target_arch = "aarch64", target_endian = "little"),
6480            target_arch = "x86",
6481            target_arch = "x86_64"
6482        )))]
6483        {
6484            self.hash3_candidate_scalar(abs_pos, current_abs_end, min_match_len)
6485        }
6486    }
6487
6488    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6489    #[target_feature(enable = "neon")]
6490    unsafe fn hash3_candidate_neon(
6491        &self,
6492        abs_pos: usize,
6493        current_abs_end: usize,
6494        min_match_len: usize,
6495    ) -> Option<MatchCandidate> {
6496        hash3_candidate_body!(
6497            self,
6498            abs_pos,
6499            current_abs_end,
6500            min_match_len,
6501            crate::encoding::fastpath::neon::common_prefix_len_ptr,
6502        )
6503    }
6504
6505    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6506    #[target_feature(enable = "sse4.2")]
6507    unsafe fn hash3_candidate_sse42(
6508        &self,
6509        abs_pos: usize,
6510        current_abs_end: usize,
6511        min_match_len: usize,
6512    ) -> Option<MatchCandidate> {
6513        hash3_candidate_body!(
6514            self,
6515            abs_pos,
6516            current_abs_end,
6517            min_match_len,
6518            crate::encoding::fastpath::sse42::common_prefix_len_ptr,
6519        )
6520    }
6521
6522    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6523    #[target_feature(enable = "avx2,bmi2")]
6524    unsafe fn hash3_candidate_avx2_bmi2(
6525        &self,
6526        abs_pos: usize,
6527        current_abs_end: usize,
6528        min_match_len: usize,
6529    ) -> Option<MatchCandidate> {
6530        hash3_candidate_body!(
6531            self,
6532            abs_pos,
6533            current_abs_end,
6534            min_match_len,
6535            crate::encoding::fastpath::avx2_bmi2::common_prefix_len_ptr,
6536        )
6537    }
6538
6539    #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
6540    fn hash3_candidate_scalar(
6541        &self,
6542        abs_pos: usize,
6543        current_abs_end: usize,
6544        min_match_len: usize,
6545    ) -> Option<MatchCandidate> {
6546        hash3_candidate_body!(
6547            self,
6548            abs_pos,
6549            current_abs_end,
6550            min_match_len,
6551            crate::encoding::fastpath::scalar::common_prefix_len_ptr,
6552        )
6553    }
6554
6555    fn insert_hash3_only_no_rebase(&mut self, abs_pos: usize) {
6556        if self.hash3_log == 0 {
6557            return;
6558        }
6559        let idx = abs_pos - self.history_abs_start;
6560        let concat = &self.history[self.history_start..];
6561        if idx + 4 > concat.len() {
6562            return;
6563        }
6564        let Some(relative_pos) = self.relative_position(abs_pos) else {
6565            return;
6566        };
6567        let hash3 = Self::hash_position_at(concat, idx, self.hash3_log, 3);
6568        self.hash3_table[hash3] = relative_pos + 1;
6569    }
6570
6571    fn update_hash3_until(&mut self, abs_pos: usize) {
6572        if self.next_to_update3 < self.history_abs_start {
6573            self.next_to_update3 = self.history_abs_start;
6574        }
6575        if self.next_to_update3 >= abs_pos {
6576            return;
6577        }
6578        while self.next_to_update3 < abs_pos {
6579            if !self.can_skip_rebase_check_at(self.next_to_update3, abs_pos) {
6580                self.maybe_rebase_positions(self.next_to_update3);
6581            }
6582            self.insert_hash3_only_no_rebase(self.next_to_update3);
6583            self.next_to_update3 = self.next_to_update3.saturating_add(1);
6584        }
6585    }
6586
6587    fn relative_position(&self, abs_pos: usize) -> Option<u32> {
6588        let shifted_abs = abs_pos.checked_add(self.index_shift)?;
6589        let rel = shifted_abs.checked_sub(self.position_base)?;
6590        let rel_u32 = u32::try_from(rel).ok()?;
6591        // Donor parity: raw BT/HC tables use 0 as the empty sentinel, so the
6592        // very first absolute position in the first block (curr == 0) is not a
6593        // representable candidate index.
6594        if !self.allow_zero_relative_position && self.position_base == 0 && rel_u32 == 0 {
6595            return None;
6596        }
6597        // Positions are stored as (relative_pos + 1), with 0 reserved as the
6598        // empty sentinel. So the raw relative position itself must stay
6599        // strictly below u32::MAX.
6600        (rel_u32 < u32::MAX).then_some(rel_u32)
6601    }
6602
6603    #[inline(always)]
6604    fn can_skip_rebase_check_at(&self, abs_pos: usize, max_abs_pos: usize) -> bool {
6605        let max_rel_no_rebase = (u32::MAX as usize).saturating_sub(2);
6606        self.position_base == 0
6607            && self.index_shift == 0
6608            && max_abs_pos <= max_rel_no_rebase
6609            && (self.allow_zero_relative_position
6610                || abs_pos > self.history_abs_start
6611                || (self.parse_mode == HcParseMode::BtUltra2 && abs_pos == self.history_abs_start))
6612    }
6613
6614    /// Hot wrapper: every `insert_position*` call passes through here. The
6615    /// fast path is "no rebase needed" — `BtUltra2` first-position skip and a
6616    /// single `relative_position()` range probe. The actual table rebuild
6617    /// fires once per ~`u32::MAX` positions on rolling streams and never for
6618    /// typical single-shot inputs, so it lives in a separate `#[cold]`
6619    /// function. Inlining the hot wrapper lets the compiler fold the early
6620    /// return into the caller and keep the cold path off the i-cache.
6621    #[inline]
6622    fn maybe_rebase_positions(&mut self, abs_pos: usize) {
6623        if self.parse_mode == HcParseMode::BtUltra2
6624            && !self.allow_zero_relative_position
6625            && self.position_base == 0
6626            && abs_pos == 0
6627        {
6628            return;
6629        }
6630        let needs_rebase = self
6631            .relative_position(abs_pos)
6632            .is_none_or(|relative| relative >= u32::MAX - 1);
6633        if !needs_rebase {
6634            return;
6635        }
6636        self.rebase_positions_cold(abs_pos);
6637    }
6638
6639    #[cold]
6640    #[inline(never)]
6641    fn rebase_positions_cold(&mut self, abs_pos: usize) {
6642        // Keep all live history addressable after rebase.
6643        self.position_base = self.history_abs_start;
6644        self.index_shift = 0;
6645        self.allow_zero_relative_position = true;
6646        self.hash_table.fill(HC_EMPTY);
6647        self.hash3_table.fill(HC_EMPTY);
6648        self.chain_table.fill(HC_EMPTY);
6649
6650        let history_start = self.history_abs_start;
6651        // Rebuild only the already-inserted prefix. The caller inserts abs_pos
6652        // immediately after this, and later positions are added in-order.
6653        if self.uses_bt_matchfinder() {
6654            let rebuild_end = self.history_abs_end();
6655            let mut pos = history_start;
6656            while pos < abs_pos {
6657                let forward = self.bt_insert_step_no_rebase(pos, rebuild_end, abs_pos);
6658                pos = pos.saturating_add(forward.max(1));
6659            }
6660        } else {
6661            for pos in history_start..abs_pos {
6662                self.insert_position_no_rebase(pos);
6663            }
6664        }
6665        self.next_to_update3 = self.next_to_update3.max(abs_pos);
6666    }
6667
6668    #[inline]
6669    fn insert_position(&mut self, abs_pos: usize) {
6670        self.maybe_rebase_positions(abs_pos);
6671        self.insert_position_no_rebase(abs_pos);
6672    }
6673
6674    #[inline]
6675    fn insert_position_no_rebase(&mut self, abs_pos: usize) {
6676        let idx = abs_pos.wrapping_sub(self.history_abs_start);
6677        let concat = &self.history[self.history_start..];
6678        if idx + 4 > concat.len() {
6679            return;
6680        }
6681        let hash = Self::hash_position_at(concat, idx, self.hash_log, 4);
6682        let Some(relative_pos) = self.relative_position(abs_pos) else {
6683            return;
6684        };
6685        let stored = relative_pos + 1;
6686        let chain_mask = (1usize << self.chain_log) - 1;
6687        let chain_idx = relative_pos as usize & chain_mask;
6688        // SAFETY: `hash` is produced by `hash_value_with_mls` which masks the
6689        // result down to `hash_log` bits, and `hash_table.len() == 1 <<
6690        // hash_log` (`ensure_tables`). `chain_idx` is `& chain_mask` so
6691        // `< chain_table.len() == 1 << chain_log`. Both indices are provably
6692        // in bounds, so the elided bounds checks save ~4 instructions per
6693        // call on this per-byte-of-input hot path.
6694        debug_assert!(hash < self.hash_table.len());
6695        debug_assert!(chain_idx < self.chain_table.len());
6696        unsafe {
6697            let prev = *self.hash_table.get_unchecked(hash);
6698            *self.chain_table.get_unchecked_mut(chain_idx) = prev;
6699            *self.hash_table.get_unchecked_mut(hash) = stored;
6700        }
6701    }
6702
6703    fn insert_positions(&mut self, start: usize, end: usize) {
6704        for pos in start..end {
6705            self.insert_position(pos);
6706        }
6707        self.next_to_update3 = self.next_to_update3.max(end);
6708    }
6709
6710    fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
6711        if step == 0 {
6712            return;
6713        }
6714        let mut pos = start;
6715        while pos < end {
6716            self.insert_position(pos);
6717            let next = pos.saturating_add(step);
6718            if next <= pos {
6719                break;
6720            }
6721            pos = next;
6722        }
6723    }
6724
6725    // Fixed-size stack array is intentional: it avoids heap allocation on
6726    // the hot path and the sentinel loop exits at self.search_depth.
6727    fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
6728        let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
6729        let idx = abs_pos - self.history_abs_start;
6730        let concat = self.live_history();
6731        if idx + 4 > concat.len() {
6732            return buf;
6733        }
6734        let hash = self.hash_position(&concat[idx..]);
6735        let chain_mask = (1 << self.chain_log) - 1;
6736
6737        let mut cur = self.hash_table[hash];
6738        let mut filled = 0;
6739        // Follow chain up to search_depth valid candidates, skipping stale
6740        // entries (evicted from window) instead of stopping at them.
6741        // Stored values are (relative_pos + 1); decode with wrapping_sub(1)
6742        // and recover absolute position via position_base + relative.
6743        // Break on self-loops (masked chain_idx collision at periodicity).
6744        let mut steps = 0;
6745        let max_chain_steps = self.search_depth;
6746        while filled < self.search_depth && steps < max_chain_steps {
6747            if cur == HC_EMPTY {
6748                break;
6749            }
6750            let candidate_rel = cur.wrapping_sub(1) as usize;
6751            let candidate_abs = self.position_base + candidate_rel;
6752            let next = self.chain_table[candidate_rel & chain_mask];
6753            steps += 1;
6754            if next == cur {
6755                // Self-loop: two positions share chain_idx, stop to avoid
6756                // spinning on the same candidate forever.
6757                if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
6758                    buf[filled] = candidate_abs;
6759                }
6760                break;
6761            }
6762            cur = next;
6763            if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
6764                continue;
6765            }
6766            buf[filled] = candidate_abs;
6767            filled += 1;
6768        }
6769        buf
6770    }
6771
6772    fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
6773        let rep = self.repcode_candidate(abs_pos, lit_len);
6774        let hash = self.hash_chain_candidate(abs_pos, lit_len);
6775        Self::better_candidate(rep, hash)
6776    }
6777
6778    fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
6779        let concat = self.live_history();
6780        let current_idx = abs_pos - self.history_abs_start;
6781        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
6782            return None;
6783        }
6784
6785        let mut best: Option<MatchCandidate> = None;
6786        for candidate_abs in self.chain_candidates(abs_pos) {
6787            if candidate_abs == usize::MAX {
6788                break;
6789            }
6790            let candidate_idx = candidate_abs - self.history_abs_start;
6791            let match_len =
6792                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
6793            if match_len >= HC_MIN_MATCH_LEN {
6794                let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
6795                best = Self::better_candidate(best, Some(candidate));
6796                if best.is_some_and(|b| b.match_len >= self.target_len) {
6797                    return best;
6798                }
6799            }
6800        }
6801        best
6802    }
6803
6804    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
6805        let reps = if lit_len == 0 {
6806            [
6807                Some(self.offset_hist[1] as usize),
6808                Some(self.offset_hist[2] as usize),
6809                (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
6810            ]
6811        } else {
6812            [
6813                Some(self.offset_hist[0] as usize),
6814                Some(self.offset_hist[1] as usize),
6815                Some(self.offset_hist[2] as usize),
6816            ]
6817        };
6818
6819        let concat = self.live_history();
6820        let current_idx = abs_pos - self.history_abs_start;
6821        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
6822            return None;
6823        }
6824
6825        let mut best = None;
6826        for rep in reps.into_iter().flatten() {
6827            if rep == 0 || rep > abs_pos {
6828                continue;
6829            }
6830            let candidate_pos = abs_pos - rep;
6831            if candidate_pos < self.history_abs_start {
6832                continue;
6833            }
6834            let candidate_idx = candidate_pos - self.history_abs_start;
6835            let match_len =
6836                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
6837            if match_len >= HC_MIN_MATCH_LEN {
6838                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
6839                best = Self::better_candidate(best, Some(candidate));
6840            }
6841        }
6842        best
6843    }
6844
6845    /// Cross-platform entry. Dispatches to the kernel-specific variant so the
6846    /// per-rep prefix probe inlines without an ABI barrier per call. The
6847    /// on-encode hot path bypasses this dispatcher via the kernel-specific
6848    /// variants invoked from inside `collect_optimal_candidates_initialized_
6849    /// <kernel>`; this entry is kept for test / external callers only.
6850    #[allow(dead_code)]
6851    #[inline(always)]
6852    fn for_each_repcode_candidate_with_reps(
6853        &self,
6854        abs_pos: usize,
6855        lit_len: usize,
6856        reps: [u32; 3],
6857        current_abs_end: usize,
6858        min_match_len: usize,
6859        f: impl FnMut(MatchCandidate),
6860    ) {
6861        // SAFETY: each branch verifies the target_feature requirement of the
6862        // callee (same shape as the BT walk dispatchers).
6863        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6864        unsafe {
6865            self.for_each_repcode_candidate_with_reps_neon(
6866                abs_pos,
6867                lit_len,
6868                reps,
6869                current_abs_end,
6870                min_match_len,
6871                f,
6872            )
6873        }
6874        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6875        {
6876            use crate::encoding::fastpath::{FastpathKernel, select_kernel};
6877            match select_kernel() {
6878                FastpathKernel::Avx2Bmi2 => unsafe {
6879                    self.for_each_repcode_candidate_with_reps_avx2_bmi2(
6880                        abs_pos,
6881                        lit_len,
6882                        reps,
6883                        current_abs_end,
6884                        min_match_len,
6885                        f,
6886                    )
6887                },
6888                FastpathKernel::Sse42 => unsafe {
6889                    self.for_each_repcode_candidate_with_reps_sse42(
6890                        abs_pos,
6891                        lit_len,
6892                        reps,
6893                        current_abs_end,
6894                        min_match_len,
6895                        f,
6896                    )
6897                },
6898                FastpathKernel::Scalar => self.for_each_repcode_candidate_with_reps_scalar(
6899                    abs_pos,
6900                    lit_len,
6901                    reps,
6902                    current_abs_end,
6903                    min_match_len,
6904                    f,
6905                ),
6906            }
6907        }
6908        #[cfg(not(any(
6909            all(target_arch = "aarch64", target_endian = "little"),
6910            target_arch = "x86",
6911            target_arch = "x86_64"
6912        )))]
6913        {
6914            self.for_each_repcode_candidate_with_reps_scalar(
6915                abs_pos,
6916                lit_len,
6917                reps,
6918                current_abs_end,
6919                min_match_len,
6920                f,
6921            )
6922        }
6923    }
6924
6925    /// NEON-umbrella variant: per-rep `common_prefix_len_ptr` call inlines via
6926    /// the shared body macro.
6927    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6928    #[target_feature(enable = "neon")]
6929    unsafe fn for_each_repcode_candidate_with_reps_neon(
6930        &self,
6931        abs_pos: usize,
6932        lit_len: usize,
6933        reps: [u32; 3],
6934        current_abs_end: usize,
6935        min_match_len: usize,
6936        mut f: impl FnMut(MatchCandidate),
6937    ) {
6938        for_each_repcode_candidate_body!(
6939            self,
6940            abs_pos,
6941            lit_len,
6942            reps,
6943            current_abs_end,
6944            min_match_len,
6945            f,
6946            crate::encoding::fastpath::neon::common_prefix_len_ptr,
6947        )
6948    }
6949
6950    /// SSE4.2 umbrella variant.
6951    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6952    #[target_feature(enable = "sse4.2")]
6953    unsafe fn for_each_repcode_candidate_with_reps_sse42(
6954        &self,
6955        abs_pos: usize,
6956        lit_len: usize,
6957        reps: [u32; 3],
6958        current_abs_end: usize,
6959        min_match_len: usize,
6960        mut f: impl FnMut(MatchCandidate),
6961    ) {
6962        for_each_repcode_candidate_body!(
6963            self,
6964            abs_pos,
6965            lit_len,
6966            reps,
6967            current_abs_end,
6968            min_match_len,
6969            f,
6970            crate::encoding::fastpath::sse42::common_prefix_len_ptr,
6971        )
6972    }
6973
6974    /// AVX2+BMI2 umbrella variant.
6975    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6976    #[target_feature(enable = "avx2,bmi2")]
6977    unsafe fn for_each_repcode_candidate_with_reps_avx2_bmi2(
6978        &self,
6979        abs_pos: usize,
6980        lit_len: usize,
6981        reps: [u32; 3],
6982        current_abs_end: usize,
6983        min_match_len: usize,
6984        mut f: impl FnMut(MatchCandidate),
6985    ) {
6986        for_each_repcode_candidate_body!(
6987            self,
6988            abs_pos,
6989            lit_len,
6990            reps,
6991            current_abs_end,
6992            min_match_len,
6993            f,
6994            crate::encoding::fastpath::avx2_bmi2::common_prefix_len_ptr,
6995        )
6996    }
6997
6998    /// Scalar fallback used on non-AArch64 targets.
6999    #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
7000    fn for_each_repcode_candidate_with_reps_scalar(
7001        &self,
7002        abs_pos: usize,
7003        lit_len: usize,
7004        reps: [u32; 3],
7005        current_abs_end: usize,
7006        min_match_len: usize,
7007        mut f: impl FnMut(MatchCandidate),
7008    ) {
7009        for_each_repcode_candidate_body!(
7010            self,
7011            abs_pos,
7012            lit_len,
7013            reps,
7014            current_abs_end,
7015            min_match_len,
7016            f,
7017            crate::encoding::fastpath::scalar::common_prefix_len_ptr,
7018        )
7019    }
7020
7021    fn extend_backwards(
7022        &self,
7023        mut candidate_pos: usize,
7024        mut abs_pos: usize,
7025        mut match_len: usize,
7026        lit_len: usize,
7027    ) -> MatchCandidate {
7028        let concat = self.live_history();
7029        let min_abs_pos = abs_pos - lit_len;
7030        while abs_pos > min_abs_pos
7031            && candidate_pos > self.history_abs_start
7032            && concat[candidate_pos - self.history_abs_start - 1]
7033                == concat[abs_pos - self.history_abs_start - 1]
7034        {
7035            candidate_pos -= 1;
7036            abs_pos -= 1;
7037            match_len += 1;
7038        }
7039        MatchCandidate {
7040            start: abs_pos,
7041            offset: abs_pos - candidate_pos,
7042            match_len,
7043        }
7044    }
7045
7046    fn better_candidate(
7047        lhs: Option<MatchCandidate>,
7048        rhs: Option<MatchCandidate>,
7049    ) -> Option<MatchCandidate> {
7050        match (lhs, rhs) {
7051            (None, other) | (other, None) => other,
7052            (Some(lhs), Some(rhs)) => {
7053                let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
7054                let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
7055                if rhs_gain > lhs_gain {
7056                    Some(rhs)
7057                } else {
7058                    Some(lhs)
7059                }
7060            }
7061        }
7062    }
7063
7064    fn match_gain(match_len: usize, offset: usize) -> i32 {
7065        debug_assert!(
7066            offset > 0,
7067            "zstd offsets are 1-indexed, offset=0 is invalid"
7068        );
7069        let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
7070        (match_len as i32) * 4 - offset_bits
7071    }
7072
7073    // Lazy lookahead queries pos+1/pos+2 before they are inserted into hash
7074    // tables — matching C zstd behavior. Seeding before comparing would let a
7075    // position match against itself, changing semantics.
7076    fn pick_lazy_match(
7077        &self,
7078        abs_pos: usize,
7079        lit_len: usize,
7080        best: Option<MatchCandidate>,
7081    ) -> Option<MatchCandidate> {
7082        let best = best?;
7083        if best.match_len >= self.target_len
7084            || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
7085        {
7086            return Some(best);
7087        }
7088
7089        let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
7090
7091        // Lazy check: evaluate pos+1
7092        let next = self.find_best_match(abs_pos + 1, lit_len + 1);
7093        if let Some(next) = next {
7094            let next_gain = Self::match_gain(next.match_len, next.offset);
7095            if next_gain > current_gain {
7096                return None;
7097            }
7098        }
7099
7100        // Lazy2 check: also evaluate pos+2
7101        if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
7102            let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
7103            if let Some(next2) = next2 {
7104                let next2_gain = Self::match_gain(next2.match_len, next2.offset);
7105                // Must beat current gain + extra literal cost
7106                if next2_gain > current_gain + 4 {
7107                    return None;
7108                }
7109            }
7110        }
7111
7112        Some(best)
7113    }
7114}
7115
7116#[test]
7117fn matches() {
7118    let mut matcher = MatchGenerator::new(1000);
7119    let mut original_data = Vec::new();
7120    let mut reconstructed = Vec::new();
7121
7122    let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
7123        Sequence::Literals { literals } => {
7124            assert!(!literals.is_empty());
7125            reconstructed.extend_from_slice(literals);
7126        }
7127        Sequence::Triple {
7128            literals,
7129            offset,
7130            match_len,
7131        } => {
7132            assert!(offset > 0);
7133            assert!(match_len >= MIN_MATCH_LEN);
7134            reconstructed.extend_from_slice(literals);
7135            assert!(offset <= reconstructed.len());
7136            let start = reconstructed.len() - offset;
7137            for i in 0..match_len {
7138                let byte = reconstructed[start + i];
7139                reconstructed.push(byte);
7140            }
7141        }
7142    };
7143
7144    matcher.add_data(
7145        alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
7146        SuffixStore::with_capacity(100),
7147        |_, _| {},
7148    );
7149    original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
7150
7151    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
7152
7153    assert!(!matcher.next_sequence(|_| {}));
7154
7155    matcher.add_data(
7156        alloc::vec![
7157            1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
7158        ],
7159        SuffixStore::with_capacity(100),
7160        |_, _| {},
7161    );
7162    original_data.extend_from_slice(&[
7163        1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
7164    ]);
7165
7166    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
7167    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
7168    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
7169    assert!(!matcher.next_sequence(|_| {}));
7170
7171    matcher.add_data(
7172        alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
7173        SuffixStore::with_capacity(100),
7174        |_, _| {},
7175    );
7176    original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
7177
7178    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
7179    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
7180    assert!(!matcher.next_sequence(|_| {}));
7181
7182    matcher.add_data(
7183        alloc::vec![0, 0, 0, 0, 0],
7184        SuffixStore::with_capacity(100),
7185        |_, _| {},
7186    );
7187    original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
7188
7189    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
7190    assert!(!matcher.next_sequence(|_| {}));
7191
7192    matcher.add_data(
7193        alloc::vec![7, 8, 9, 10, 11],
7194        SuffixStore::with_capacity(100),
7195        |_, _| {},
7196    );
7197    original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
7198
7199    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
7200    assert!(!matcher.next_sequence(|_| {}));
7201
7202    matcher.add_data(
7203        alloc::vec![1, 3, 5, 7, 9],
7204        SuffixStore::with_capacity(100),
7205        |_, _| {},
7206    );
7207    matcher.skip_matching();
7208    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
7209    reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
7210    assert!(!matcher.next_sequence(|_| {}));
7211
7212    matcher.add_data(
7213        alloc::vec![1, 3, 5, 7, 9],
7214        SuffixStore::with_capacity(100),
7215        |_, _| {},
7216    );
7217    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
7218
7219    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
7220    assert!(!matcher.next_sequence(|_| {}));
7221
7222    matcher.add_data(
7223        alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
7224        SuffixStore::with_capacity(100),
7225        |_, _| {},
7226    );
7227    original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
7228
7229    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
7230    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
7231    assert!(!matcher.next_sequence(|_| {}));
7232
7233    assert_eq!(reconstructed, original_data);
7234}
7235
7236#[test]
7237fn dfast_matches_roundtrip_multi_block_pattern() {
7238    let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
7239    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
7240    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
7241
7242    let mut matcher = DfastMatchGenerator::new(1 << 22);
7243    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
7244        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
7245        Sequence::Triple {
7246            literals,
7247            offset,
7248            match_len,
7249        } => {
7250            decoded.extend_from_slice(literals);
7251            let start = decoded.len() - offset;
7252            for i in 0..match_len {
7253                let byte = decoded[start + i];
7254                decoded.push(byte);
7255            }
7256        }
7257    };
7258
7259    matcher.add_data(first_block.clone(), |_| {});
7260    let mut history = Vec::new();
7261    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
7262    assert_eq!(history, first_block);
7263
7264    matcher.add_data(second_block.clone(), |_| {});
7265    let prefix_len = history.len();
7266    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
7267
7268    assert_eq!(&history[prefix_len..], second_block.as_slice());
7269}
7270
7271#[test]
7272fn driver_switches_backends_and_initializes_dfast_via_reset() {
7273    let mut driver = MatchGeneratorDriver::new(32, 2);
7274
7275    driver.reset(CompressionLevel::Default);
7276    assert_eq!(driver.active_backend, MatcherBackend::Dfast);
7277    assert_eq!(driver.window_size(), (1u64 << 22));
7278
7279    let mut first = driver.get_next_space();
7280    first[..12].copy_from_slice(b"abcabcabcabc");
7281    first.truncate(12);
7282    driver.commit_space(first);
7283    assert_eq!(driver.get_last_space(), b"abcabcabcabc");
7284    driver.skip_matching_with_hint(None);
7285
7286    let mut second = driver.get_next_space();
7287    second[..12].copy_from_slice(b"abcabcabcabc");
7288    second.truncate(12);
7289    driver.commit_space(second);
7290
7291    let mut reconstructed = b"abcabcabcabc".to_vec();
7292    driver.start_matching(|seq| match seq {
7293        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
7294        Sequence::Triple {
7295            literals,
7296            offset,
7297            match_len,
7298        } => {
7299            reconstructed.extend_from_slice(literals);
7300            let start = reconstructed.len() - offset;
7301            for i in 0..match_len {
7302                let byte = reconstructed[start + i];
7303                reconstructed.push(byte);
7304            }
7305        }
7306    });
7307    assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
7308
7309    driver.reset(CompressionLevel::Fastest);
7310    assert_eq!(driver.window_size(), (1u64 << 17));
7311}
7312
7313#[test]
7314fn driver_level4_selects_row_backend() {
7315    let mut driver = MatchGeneratorDriver::new(32, 2);
7316    driver.reset(CompressionLevel::Level(4));
7317    assert_eq!(driver.active_backend, MatcherBackend::Row);
7318}
7319
7320#[test]
7321fn level_16_17_use_btopt_parse_mode() {
7322    let p16 = resolve_level_params(CompressionLevel::Level(16), None);
7323    let p17 = resolve_level_params(CompressionLevel::Level(17), None);
7324    assert_eq!(p16.backend, MatcherBackend::HashChain);
7325    assert_eq!(p17.backend, MatcherBackend::HashChain);
7326    assert_eq!(p16.hc.parse_mode, HcParseMode::BtOpt);
7327    assert_eq!(p17.hc.parse_mode, HcParseMode::BtOpt);
7328}
7329
7330#[test]
7331fn level_18_19_use_btultra_parse_mode() {
7332    let p18 = resolve_level_params(CompressionLevel::Level(18), None);
7333    let p19 = resolve_level_params(CompressionLevel::Level(19), None);
7334    assert_eq!(p18.backend, MatcherBackend::HashChain);
7335    assert_eq!(p19.backend, MatcherBackend::HashChain);
7336    assert_eq!(p18.hc.parse_mode, HcParseMode::BtUltra);
7337    assert_eq!(p19.hc.parse_mode, HcParseMode::BtUltra);
7338}
7339
7340#[test]
7341fn level_20_22_use_btultra2_parse_mode() {
7342    for level in 20..=22 {
7343        let params = resolve_level_params(CompressionLevel::Level(level), None);
7344        assert_eq!(params.backend, MatcherBackend::HashChain);
7345        assert_eq!(params.hc.parse_mode, HcParseMode::BtUltra2);
7346    }
7347}
7348
7349#[test]
7350fn level22_uses_donor_target_length_and_large_input_tables() {
7351    let params = resolve_level_params(CompressionLevel::Level(22), None);
7352    assert_eq!(params.window_log, 27);
7353    assert_eq!(params.hc.hash_log, 25);
7354    assert_eq!(params.hc.chain_log, 27);
7355    assert_eq!(params.hc.search_depth, 1 << 9);
7356    assert_eq!(params.hc.target_len, 999);
7357}
7358
7359#[test]
7360fn level22_source_size_hint_uses_donor_btultra2_tiers() {
7361    let p16k = resolve_level_params(CompressionLevel::Level(22), Some(16 * 1024));
7362    assert_eq!(p16k.window_log, 14);
7363    assert_eq!(p16k.hc.hash_log, 15);
7364    assert_eq!(p16k.hc.chain_log, 15);
7365    assert_eq!(p16k.hc.search_depth, 1 << 10);
7366    assert_eq!(p16k.hc.target_len, 999);
7367
7368    let p128k = resolve_level_params(CompressionLevel::Level(22), Some(128 * 1024));
7369    assert_eq!(p128k.window_log, 17);
7370    assert_eq!(p128k.hc.hash_log, 17);
7371    assert_eq!(p128k.hc.chain_log, 18);
7372    assert_eq!(p128k.hc.search_depth, 1 << 11);
7373    assert_eq!(p128k.hc.target_len, 999);
7374
7375    let p256k = resolve_level_params(CompressionLevel::Level(22), Some(256 * 1024));
7376    assert_eq!(p256k.window_log, 18);
7377    assert_eq!(p256k.hc.hash_log, 19);
7378    assert_eq!(p256k.hc.chain_log, 19);
7379    assert_eq!(p256k.hc.search_depth, 1 << 13);
7380    assert_eq!(p256k.hc.target_len, 999);
7381}
7382
7383#[test]
7384fn level22_small_source_size_hint_matches_donor_cparams() {
7385    use zstd::zstd_safe::zstd_sys;
7386
7387    let source_size = 15_027u64;
7388    let donor = unsafe { zstd_sys::ZSTD_getCParams(22, source_size, 0) };
7389    let params = resolve_level_params(CompressionLevel::Level(22), Some(source_size));
7390
7391    assert_eq!(params.window_log as u32, donor.windowLog);
7392    assert_eq!(params.hc.chain_log as u32, donor.chainLog);
7393    assert_eq!(params.hc.hash_log as u32, donor.hashLog);
7394    assert_eq!(params.hc.search_depth as u32, 1u32 << donor.searchLog);
7395    assert_eq!(HC_OPT_MIN_MATCH_LEN as u32, donor.minMatch);
7396    assert_eq!(params.hc.target_len as u32, donor.targetLength);
7397}
7398
7399#[test]
7400fn level22_small_source_uses_window_bounded_hash3_log() {
7401    let mut hc = HcMatchGenerator::new(1 << 14);
7402    hc.configure(BTULTRA2_HC_CONFIG_L22_16K, 14);
7403    assert_eq!(hc.hash3_log, 14);
7404
7405    hc.configure(BTULTRA2_HC_CONFIG_L22, 27);
7406    assert_eq!(hc.hash3_log, HC3_HASH_LOG);
7407}
7408
7409#[test]
7410fn btultra2_seed_pass_initializes_opt_state() {
7411    let mut hc = HcMatchGenerator::new(1 << 20);
7412    hc.configure(BTULTRA2_HC_CONFIG, 26);
7413    let data: Vec<u8> = (0..32 * 1024).map(|i| (i % 251) as u8).collect();
7414    hc.add_data(data, |_| {});
7415    hc.start_matching(|_| {});
7416    assert!(
7417        hc.opt_state.lit_length_sum > 0,
7418        "btultra2 first block should seed non-zero sequence statistics"
7419    );
7420    assert!(
7421        hc.opt_state.off_code_sum > 0,
7422        "btultra2 first block should seed offset-code statistics"
7423    );
7424}
7425
7426#[test]
7427fn btultra2_profile_disables_small_offset_handicap() {
7428    let p1 = HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, false);
7429    let p2 = HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, true);
7430    assert!(
7431        !p1.favor_small_offsets,
7432        "btultra2 primary profile should match donor opt2 offset pricing"
7433    );
7434    assert!(
7435        !p2.favor_small_offsets,
7436        "btultra2 secondary profile should match donor opt2 offset pricing"
7437    );
7438}
7439
7440#[test]
7441fn btultra2_profile_is_single_pass_opt2() {
7442    let p1 = HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, false);
7443    let p2 = HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, true);
7444    assert_eq!(p1.max_chain_depth, p2.max_chain_depth);
7445    assert_eq!(p1.sufficient_match_len, p2.sufficient_match_len);
7446    assert_eq!(p1.accurate, p2.accurate);
7447    assert_eq!(p1.favor_small_offsets, p2.favor_small_offsets);
7448    assert!(
7449        p1.accurate,
7450        "btultra2 should use donor opt2 accurate pricing in the main pass"
7451    );
7452}
7453
7454#[test]
7455fn btultra_profile_keeps_donor_search_depth_budget() {
7456    let p = HcOptimalCostProfile::for_mode(HcParseMode::BtUltra, false);
7457    assert_eq!(
7458        p.max_chain_depth, 32,
7459        "btultra should not cap chain depth below donor opt2 search budget"
7460    );
7461}
7462
7463#[test]
7464fn btopt_profile_keeps_donor_search_depth_budget() {
7465    let p = HcOptimalCostProfile::for_mode(HcParseMode::BtOpt, false);
7466    assert_eq!(
7467        p.max_chain_depth, 32,
7468        "btopt should not cap chain depth below donor btopt search budget"
7469    );
7470}
7471
7472#[test]
7473fn sufficient_match_len_is_clamped_by_target_len() {
7474    let mut hc = HcMatchGenerator::new(1 << 20);
7475    hc.configure(BTULTRA2_HC_CONFIG, 26);
7476    hc.target_len = 13;
7477    let profile = HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, true);
7478    assert_eq!(hc.sufficient_match_len_for_pass(profile), 13);
7479}
7480
7481#[test]
7482fn opt_modes_use_target_len_as_sufficient_len() {
7483    let mut hc = HcMatchGenerator::new(1 << 20);
7484    hc.target_len = 57;
7485    for (mode, pass2) in [
7486        (HcParseMode::BtOpt, false),
7487        (HcParseMode::BtUltra, false),
7488        (HcParseMode::BtUltra2, false),
7489        (HcParseMode::BtUltra2, true),
7490    ] {
7491        let profile = HcOptimalCostProfile::for_mode(mode, pass2);
7492        assert_eq!(hc.sufficient_match_len_for_pass(profile), 57);
7493    }
7494}
7495
7496#[test]
7497fn sufficient_match_len_is_capped_by_opt_num() {
7498    let mut hc = HcMatchGenerator::new(1 << 20);
7499    hc.target_len = usize::MAX / 2;
7500    let profile = HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, true);
7501    assert_eq!(hc.sufficient_match_len_for_pass(profile), HC_OPT_NUM - 1);
7502}
7503
7504#[test]
7505fn dictionary_entropy_seed_initializes_opt_state_from_tables() {
7506    let mut hc = HcMatchGenerator::new(1 << 20);
7507    hc.configure(BTULTRA2_HC_CONFIG, 26);
7508
7509    let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
7510        b"aaabbbbccccddddeeeeefffffgggg",
7511    );
7512    let ll = crate::fse::fse_encoder::default_ll_table();
7513    let ml = crate::fse::fse_encoder::default_ml_table();
7514    let of = crate::fse::fse_encoder::default_of_table();
7515    hc.seed_dictionary_entropy(Some(&huff), Some(&ll), Some(&ml), Some(&of));
7516
7517    hc.opt_state.rescale_freqs(
7518        b"abcd",
7519        HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, false),
7520    );
7521
7522    let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
7523        4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7524        1, 1, 1, 1, 1, 1,
7525    ];
7526
7527    assert_ne!(
7528        hc.opt_state.lit_length_freq, base_ll_freqs,
7529        "dictionary entropy should override fallback LL bootstrap frequencies"
7530    );
7531    assert!(
7532        hc.opt_state.match_length_freq.iter().any(|&v| v != 1),
7533        "dictionary entropy should seed non-uniform ML frequencies"
7534    );
7535    assert_ne!(
7536        hc.opt_state.off_code_freq[0], 6,
7537        "dictionary entropy should override fallback OF bootstrap frequencies"
7538    );
7539}
7540
7541#[test]
7542fn dictionary_fse_seed_applies_without_huffman_seed() {
7543    let mut hc = HcMatchGenerator::new(1 << 20);
7544    hc.configure(BTULTRA2_HC_CONFIG, 26);
7545
7546    let ll = crate::fse::fse_encoder::default_ll_table();
7547    let ml = crate::fse::fse_encoder::default_ml_table();
7548    let of = crate::fse::fse_encoder::default_of_table();
7549    hc.seed_dictionary_entropy(None, Some(&ll), Some(&ml), Some(&of));
7550    hc.opt_state.rescale_freqs(
7551        b"abcd",
7552        HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, false),
7553    );
7554
7555    let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
7556        4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7557        1, 1, 1, 1, 1, 1,
7558    ];
7559    assert_ne!(
7560        hc.opt_state.lit_length_freq, base_ll_freqs,
7561        "FSE seed should still override LL bootstrap frequencies without huffman seed"
7562    );
7563    assert!(
7564        hc.opt_state.match_length_freq.iter().any(|&v| v != 1),
7565        "FSE seed should still seed non-uniform ML frequencies"
7566    );
7567    assert_ne!(
7568        hc.opt_state.off_code_freq[0], 6,
7569        "FSE seed should still override OF bootstrap frequencies without huffman seed"
7570    );
7571}
7572
7573#[test]
7574fn dictionary_seed_overrides_predef_price_mode_on_tiny_input() {
7575    let mut hc = HcMatchGenerator::new(1 << 20);
7576    hc.configure(BTULTRA2_HC_CONFIG, 26);
7577
7578    let ll = crate::fse::fse_encoder::default_ll_table();
7579    let ml = crate::fse::fse_encoder::default_ml_table();
7580    let of = crate::fse::fse_encoder::default_of_table();
7581    hc.seed_dictionary_entropy(None, Some(&ll), Some(&ml), Some(&of));
7582    hc.opt_state.rescale_freqs(
7583        b"abc",
7584        HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, false),
7585    );
7586    assert!(
7587        matches!(hc.opt_state.price_type, HcOptPriceType::Dynamic),
7588        "dictionary-seeded first block should stay in dynamic mode even for tiny src"
7589    );
7590}
7591
7592#[test]
7593fn lit_length_price_blocksize_max_costs_one_extra_bit() {
7594    let profile_predef = HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, false);
7595    let mut stats_predef = HcOptState::new();
7596    stats_predef.price_type = HcOptPriceType::Predefined;
7597    let predef_max = profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX);
7598    let predef_prev =
7599        profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX.saturating_sub(1));
7600    assert_eq!(
7601        predef_max,
7602        predef_prev + HC_BITCOST_MULTIPLIER,
7603        "predefined litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
7604    );
7605
7606    let profile_dyn = HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, true);
7607    let mut stats_dyn = HcOptState::new();
7608    stats_dyn.price_type = HcOptPriceType::Dynamic;
7609    stats_dyn.lit_length_freq.fill(1);
7610    stats_dyn.lit_length_sum = (HC_MAX_LL + 1) as u32;
7611    stats_dyn.match_length_freq.fill(1);
7612    stats_dyn.match_length_sum = (HC_MAX_ML + 1) as u32;
7613    stats_dyn.off_code_freq.fill(1);
7614    stats_dyn.off_code_sum = (HC_MAX_OFF + 1) as u32;
7615    stats_dyn.lit_freq.fill(1);
7616    stats_dyn.lit_sum = (HC_MAX_LIT + 1) as u32;
7617    stats_dyn.set_base_prices(true);
7618    let dyn_max = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX);
7619    let dyn_prev = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX.saturating_sub(1));
7620    assert_eq!(
7621        dyn_max,
7622        dyn_prev + HC_BITCOST_MULTIPLIER,
7623        "dynamic litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
7624    );
7625}
7626
7627#[test]
7628fn btultra2_seed_pass_disabled_when_dictionary_entropy_seed_present() {
7629    let mut hc = HcMatchGenerator::new(1 << 20);
7630    hc.configure(BTULTRA2_HC_CONFIG, 26);
7631    let ll = crate::fse::fse_encoder::default_ll_table();
7632    let ml = crate::fse::fse_encoder::default_ml_table();
7633    let of = crate::fse::fse_encoder::default_of_table();
7634    hc.seed_dictionary_entropy(None, Some(&ll), Some(&ml), Some(&of));
7635    assert!(
7636        !hc.should_run_btultra2_seed_pass(HC_PREDEF_THRESHOLD + 1),
7637        "dictionary-seeded first block should skip btultra2 warmup pass"
7638    );
7639}
7640
7641#[test]
7642fn btultra2_seed_pass_disabled_when_prefix_history_exists() {
7643    let mut hc = HcMatchGenerator::new(1 << 20);
7644    hc.configure(BTULTRA2_HC_CONFIG, 26);
7645    hc.history_abs_start = 17;
7646    hc.window.push_back(b"abcdefghijklmnop".to_vec());
7647    assert!(
7648        !hc.should_run_btultra2_seed_pass(HC_PREDEF_THRESHOLD + 9),
7649        "btultra2 warmup must be first-block only (no prefix history)"
7650    );
7651}
7652
7653#[test]
7654fn btultra2_seed_pass_disabled_for_tiny_block() {
7655    let mut hc = HcMatchGenerator::new(1 << 20);
7656    hc.configure(BTULTRA2_HC_CONFIG, 26);
7657    assert!(
7658        !hc.should_run_btultra2_seed_pass(HC_PREDEF_THRESHOLD),
7659        "btultra2 warmup should not run at or below predefined threshold"
7660    );
7661}
7662
7663#[test]
7664fn btultra2_seed_pass_disabled_after_stats_initialized() {
7665    let mut hc = HcMatchGenerator::new(1 << 20);
7666    hc.configure(BTULTRA2_HC_CONFIG, 26);
7667    hc.opt_state.lit_length_sum = 1;
7668    assert!(
7669        !hc.should_run_btultra2_seed_pass(HC_PREDEF_THRESHOLD + 32),
7670        "btultra2 warmup should run only for first block before stats are initialized"
7671    );
7672}
7673
7674#[test]
7675fn btultra2_seed_pass_disabled_when_not_at_frame_start() {
7676    let mut hc = HcMatchGenerator::new(1 << 20);
7677    hc.configure(BTULTRA2_HC_CONFIG, 26);
7678    // Simulate non-first block state: current block has no prefix in deque,
7679    // but total produced window already includes prior output.
7680    hc.window_size = HC_PREDEF_THRESHOLD + 64;
7681    hc.window
7682        .push_back(alloc::vec![b'A'; HC_PREDEF_THRESHOLD + 32]);
7683    assert!(
7684        !hc.should_run_btultra2_seed_pass(HC_PREDEF_THRESHOLD + 32),
7685        "btultra2 warmup must not run after frame start"
7686    );
7687}
7688
7689#[test]
7690fn btultra2_seed_pass_disabled_when_ldm_sequences_exist() {
7691    let mut hc = HcMatchGenerator::new(1 << 20);
7692    hc.configure(BTULTRA2_HC_CONFIG, 26);
7693    hc.window_size = HC_PREDEF_THRESHOLD + 64;
7694    hc.window
7695        .push_back(alloc::vec![b'A'; HC_PREDEF_THRESHOLD + 64]);
7696    hc.ldm_sequences.push(HcRawSeq {
7697        lit_length: 8,
7698        offset: 16,
7699        match_length: 32,
7700    });
7701    assert!(
7702        !hc.should_run_btultra2_seed_pass(HC_PREDEF_THRESHOLD + 32),
7703        "btultra2 warmup must not run when LDM already produced sequences"
7704    );
7705}
7706
7707#[test]
7708fn literal_price_uses_eight_bits_when_literals_uncompressed() {
7709    let profile = HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, false);
7710    let mut stats = HcOptState::new();
7711    stats.set_literals_compressed_for_tests(false);
7712    stats.price_type = HcOptPriceType::Predefined;
7713    assert_eq!(
7714        profile.literal_price(&stats, b'a'),
7715        8 * HC_BITCOST_MULTIPLIER,
7716        "uncompressed literals should cost 8 bits regardless of price mode"
7717    );
7718}
7719
7720#[test]
7721fn update_stats_skips_literal_frequencies_when_uncompressed() {
7722    let mut stats = HcOptState::new();
7723    stats.set_literals_compressed_for_tests(false);
7724    stats.update_stats(3, b"abc", 4, 8);
7725    assert_eq!(
7726        stats.lit_sum, 0,
7727        "literal sum must remain unchanged when literal compression is disabled"
7728    );
7729    assert_eq!(
7730        stats.lit_freq.iter().copied().sum::<u32>(),
7731        0,
7732        "literal frequencies must not be updated when literal compression is disabled"
7733    );
7734    assert_eq!(
7735        stats.lit_length_sum, 1,
7736        "literal-length stats still update for sequence modeling"
7737    );
7738    assert_eq!(
7739        stats.match_length_sum, 1,
7740        "match-length stats still update for sequence modeling"
7741    );
7742    assert_eq!(
7743        stats.off_code_sum, 1,
7744        "offset-code stats still update for sequence modeling"
7745    );
7746}
7747
7748#[test]
7749fn dictionary_huffman_seed_ignored_when_literals_uncompressed() {
7750    let mut stats = HcOptState::new();
7751    stats.set_literals_compressed_for_tests(false);
7752    let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
7753        b"aaaaabbbbcccddeeff00112233445566778899",
7754    );
7755    let ll = crate::fse::fse_encoder::default_ll_table();
7756    let ml = crate::fse::fse_encoder::default_ml_table();
7757    let of = crate::fse::fse_encoder::default_of_table();
7758    stats.seed_dictionary_entropy(Some(&huff), Some(&ll), Some(&ml), Some(&of));
7759    stats.rescale_freqs(
7760        b"abcd",
7761        HcOptimalCostProfile::for_mode(HcParseMode::BtUltra2, false),
7762    );
7763    assert_eq!(
7764        stats.lit_sum, 0,
7765        "literal sum must stay zero when literals are uncompressed"
7766    );
7767    assert_eq!(
7768        stats.lit_freq.iter().copied().sum::<u32>(),
7769        0,
7770        "literal frequencies must ignore dictionary huffman seed when uncompressed"
7771    );
7772}
7773
7774#[test]
7775fn hc_repcode_candidates_respect_litlen_dependent_rep_order() {
7776    let mut hc = HcMatchGenerator::new(64);
7777    hc.history = b"xxxxxxABCDEFABCDEF".to_vec();
7778    hc.history_start = 0;
7779    hc.history_abs_start = 0;
7780
7781    let abs_pos = 12usize; // points at second "ABCDEF"
7782    let current_abs_end = hc.history.len();
7783    let reps = [6u32, 3u32, 9u32];
7784
7785    let mut lit_pos_candidates = Vec::new();
7786    hc.for_each_repcode_candidate_with_reps(
7787        abs_pos,
7788        1,
7789        reps,
7790        current_abs_end,
7791        HC_OPT_MIN_MATCH_LEN,
7792        |c| {
7793            lit_pos_candidates.push(c.offset);
7794        },
7795    );
7796    assert!(
7797        lit_pos_candidates.contains(&6),
7798        "when lit_len>0, rep0 should be considered and match"
7799    );
7800
7801    let mut ll0_candidates = Vec::new();
7802    hc.for_each_repcode_candidate_with_reps(
7803        abs_pos,
7804        0,
7805        reps,
7806        current_abs_end,
7807        HC_OPT_MIN_MATCH_LEN,
7808        |c| {
7809            ll0_candidates.push(c.offset);
7810        },
7811    );
7812    assert!(
7813        !ll0_candidates.contains(&6),
7814        "when lit_len==0, rep0 is not directly eligible (ll0 semantics)"
7815    );
7816}
7817
7818#[test]
7819fn hc_collect_optimal_candidates_keeps_reps_when_chain_depth_zero() {
7820    let mut hc = HcMatchGenerator::new(64);
7821    hc.search_depth = 0;
7822    hc.history = b"xyzxyzxyzxyz".to_vec();
7823    hc.history_start = 0;
7824    hc.history_abs_start = 0;
7825
7826    let abs_pos = 6usize;
7827    let current_abs_end = hc.history.len();
7828    let profile = HcOptimalCostProfile {
7829        max_chain_depth: 0,
7830        sufficient_match_len: usize::MAX / 2,
7831        accurate: false,
7832        favor_small_offsets: false,
7833    };
7834    let mut out = Vec::new();
7835    hc.collect_optimal_candidates(
7836        abs_pos,
7837        current_abs_end,
7838        profile,
7839        HcCandidateQuery {
7840            reps: [3, 6, 9],
7841            lit_len: 1,
7842            ldm_candidate: None,
7843        },
7844        &mut out,
7845    );
7846    assert!(
7847        !out.is_empty(),
7848        "rep candidates should remain available even when chain depth is zero"
7849    );
7850    assert!(
7851        out.iter().any(|c| c.offset == 3),
7852        "rep0 candidate should be retained"
7853    );
7854}
7855
7856#[test]
7857fn hc_collect_optimal_candidates_rep_tail_match_skips_chain_probe() {
7858    let mut hc = HcMatchGenerator::new(64);
7859    hc.history = b"aaaaaaaaaa".to_vec();
7860    hc.history_start = 0;
7861    hc.history_abs_start = 0;
7862    hc.position_base = 0;
7863    hc.search_depth = 32;
7864    let abs_pos = 6usize;
7865    hc.ensure_tables();
7866    hc.insert_positions(0, abs_pos);
7867
7868    let profile = HcOptimalCostProfile {
7869        max_chain_depth: 32,
7870        sufficient_match_len: usize::MAX / 2,
7871        accurate: true,
7872        favor_small_offsets: false,
7873    };
7874    let mut out = Vec::new();
7875    hc.collect_optimal_candidates(
7876        abs_pos,
7877        hc.history.len(),
7878        profile,
7879        HcCandidateQuery {
7880            reps: [1, 4, 8],
7881            lit_len: 1,
7882            ldm_candidate: None,
7883        },
7884        &mut out,
7885    );
7886
7887    assert!(
7888        out.iter()
7889            .all(|candidate| matches!(candidate.offset, 1 | 4)),
7890        "terminal rep match should return before chain probing adds non-rep offsets"
7891    );
7892}
7893
7894#[test]
7895fn hc_collect_optimal_candidates_long_chain_match_advances_skip_window() {
7896    let mut hc = HcMatchGenerator::new(128);
7897    hc.history = b"abcabcabcabcabcabcabcabc".to_vec();
7898    hc.history_start = 0;
7899    hc.history_abs_start = 0;
7900    hc.position_base = 0;
7901    hc.search_depth = 32;
7902    let abs_pos = 9usize;
7903    hc.ensure_tables();
7904    hc.insert_positions(0, abs_pos);
7905    hc.skip_insert_until_abs = 0;
7906
7907    let profile = HcOptimalCostProfile {
7908        max_chain_depth: 32,
7909        sufficient_match_len: usize::MAX / 2,
7910        accurate: true,
7911        favor_small_offsets: false,
7912    };
7913    let mut out = Vec::new();
7914    hc.collect_optimal_candidates(
7915        abs_pos,
7916        hc.history.len(),
7917        profile,
7918        HcCandidateQuery {
7919            reps: [1, 4, 8],
7920            lit_len: 1,
7921            ldm_candidate: None,
7922        },
7923        &mut out,
7924    );
7925
7926    assert!(
7927        hc.skip_insert_until_abs > abs_pos,
7928        "long chain match should advance skip window to avoid redundant immediate insertions"
7929    );
7930}
7931
7932#[test]
7933fn hc_collect_optimal_candidates_chain_fast_skip_uses_match_end_minus_8() {
7934    let mut hc = HcMatchGenerator::new(128);
7935    hc.history = b"abcabcabcabcabcabcabcabc".to_vec();
7936    hc.history_start = 0;
7937    hc.history_abs_start = 0;
7938    hc.position_base = 0;
7939    hc.search_depth = 32;
7940    let abs_pos = 9usize;
7941    hc.ensure_tables();
7942    hc.insert_positions(0, abs_pos);
7943    hc.skip_insert_until_abs = 0;
7944
7945    let profile = HcOptimalCostProfile {
7946        max_chain_depth: 32,
7947        sufficient_match_len: 10,
7948        accurate: true,
7949        favor_small_offsets: false,
7950    };
7951    let mut out = Vec::new();
7952    hc.collect_optimal_candidates(
7953        abs_pos,
7954        hc.history.len(),
7955        profile,
7956        HcCandidateQuery {
7957            reps: [1, 4, 8],
7958            lit_len: 1,
7959            ldm_candidate: None,
7960        },
7961        &mut out,
7962    );
7963
7964    let best_match_end = out
7965        .iter()
7966        .map(|candidate| candidate.start.saturating_add(candidate.match_len))
7967        .max()
7968        .expect("expected at least one candidate");
7969    assert!(
7970        hc.skip_insert_until_abs > abs_pos,
7971        "chain fast-skip must advance past current position"
7972    );
7973    assert!(
7974        hc.skip_insert_until_abs <= best_match_end.saturating_sub(8),
7975        "chain fast-skip must not exceed donor-style matchEndIdx - 8 bound"
7976    );
7977}
7978
7979#[test]
7980fn hc_collect_optimal_candidates_advances_skip_window_on_plain_bt_path() {
7981    let mut hc = HcMatchGenerator::new(256);
7982    hc.history = b"abcdefghijklmnop".to_vec();
7983    hc.history_start = 0;
7984    hc.history_abs_start = 0;
7985    hc.position_base = 0;
7986    hc.search_depth = 0;
7987    hc.ensure_tables();
7988
7989    let abs_pos = 8usize;
7990    hc.skip_insert_until_abs = 0;
7991
7992    let profile = HcOptimalCostProfile {
7993        max_chain_depth: 0,
7994        sufficient_match_len: usize::MAX / 2,
7995        accurate: true,
7996        favor_small_offsets: false,
7997    };
7998    let mut out = Vec::new();
7999    hc.collect_optimal_candidates(
8000        abs_pos,
8001        hc.history.len(),
8002        profile,
8003        HcCandidateQuery {
8004            reps: [1, 4, 8],
8005            lit_len: 1,
8006            ldm_candidate: None,
8007        },
8008        &mut out,
8009    );
8010
8011    assert_eq!(
8012        hc.skip_insert_until_abs,
8013        abs_pos.saturating_add(1),
8014        "plain BT path should advance skip window by 1 via donor matchEndIdx baseline"
8015    );
8016}
8017
8018#[test]
8019fn hc_collect_optimal_candidates_uses_hash3_when_chain_depth_zero() {
8020    let mut hc = HcMatchGenerator::new(256);
8021    hc.history = b"abcde1234abcdeZZZZ".to_vec();
8022    hc.history_start = 0;
8023    hc.history_abs_start = 0;
8024    hc.position_base = 0;
8025    hc.search_depth = 0;
8026    let abs_pos = 9usize; // second "abcde"
8027    hc.ensure_tables();
8028    hc.insert_positions(0, abs_pos);
8029    // Donor hash3 has an independent nextToUpdate3 cursor; main-table
8030    // insertion does not imply the HC3 side table has been filled.
8031    hc.next_to_update3 = 0;
8032
8033    let profile = HcOptimalCostProfile {
8034        max_chain_depth: 0,
8035        sufficient_match_len: usize::MAX / 2,
8036        accurate: true,
8037        favor_small_offsets: false,
8038    };
8039    let mut out = Vec::new();
8040    hc.collect_optimal_candidates(
8041        abs_pos,
8042        hc.history.len(),
8043        profile,
8044        HcCandidateQuery {
8045            reps: [1, 2, 3],
8046            lit_len: 1,
8047            ldm_candidate: None,
8048        },
8049        &mut out,
8050    );
8051
8052    assert!(
8053        out.iter()
8054            .any(|candidate| candidate.offset == 9 && candidate.match_len >= HC_MIN_MATCH_LEN),
8055        "hash3 candidate should supply at least one valid match when chain search is disabled"
8056    );
8057}
8058
8059#[test]
8060fn hc_collect_optimal_candidates_hash3_updates_skipped_prefix_positions() {
8061    let mut hc = HcMatchGenerator::new(256);
8062    hc.history = b"abcdeZabcdeYY".to_vec();
8063    hc.history_start = 0;
8064    hc.history_abs_start = 0;
8065    hc.position_base = 0;
8066    hc.search_depth = 0;
8067    hc.ensure_tables();
8068    // Simulate donor-like nextToUpdate3 path: no explicit hash/chain insertions
8069    // were done for prefix positions, so hash3 must fill them on demand.
8070    hc.next_to_update3 = 0;
8071
8072    let abs_pos = 6usize; // second "abcde"
8073    let profile = HcOptimalCostProfile {
8074        max_chain_depth: 0,
8075        sufficient_match_len: usize::MAX / 2,
8076        accurate: true,
8077        favor_small_offsets: false,
8078    };
8079    let mut out = Vec::new();
8080    hc.collect_optimal_candidates(
8081        abs_pos,
8082        hc.history.len(),
8083        profile,
8084        HcCandidateQuery {
8085            reps: [1, 2, 3],
8086            lit_len: 1,
8087            ldm_candidate: None,
8088        },
8089        &mut out,
8090    );
8091
8092    assert!(
8093        out.iter()
8094            .any(|candidate| candidate.offset == 6 && candidate.match_len >= HC_MIN_MATCH_LEN),
8095        "hash3 incremental update should surface prefix match even without explicit insert_positions"
8096    );
8097}
8098
8099#[test]
8100fn hc_hash3_tail_match_advances_update_cursor_on_early_return() {
8101    let mut hc = HcMatchGenerator::new(256);
8102    hc.history = b"abcdeZabcde".to_vec();
8103    hc.history_start = 0;
8104    hc.history_abs_start = 0;
8105    hc.position_base = 0;
8106    hc.search_depth = 0;
8107    hc.ensure_tables();
8108    hc.next_to_update3 = 0;
8109
8110    let abs_pos = 6usize; // second "abcde", match reaches current end
8111    let profile = HcOptimalCostProfile {
8112        max_chain_depth: 0,
8113        sufficient_match_len: usize::MAX / 2,
8114        accurate: true,
8115        favor_small_offsets: false,
8116    };
8117    let mut out = Vec::new();
8118    hc.collect_optimal_candidates(
8119        abs_pos,
8120        hc.history.len(),
8121        profile,
8122        HcCandidateQuery {
8123            reps: [1, 2, 3],
8124            lit_len: 1,
8125            ldm_candidate: None,
8126        },
8127        &mut out,
8128    );
8129
8130    assert!(
8131        hc.next_to_update3 >= abs_pos,
8132        "tail-reaching hash3 lookup should fill the update cursor to current position"
8133    );
8134    assert!(
8135        hc.skip_insert_until_abs > abs_pos,
8136        "tail-reaching hash3 early return should request skipping current-position hash insertion"
8137    );
8138}
8139
8140#[test]
8141fn hc_ldm_candidates_are_merged_into_optimal_candidates() {
8142    let mut hc = HcMatchGenerator::new(512);
8143    hc.history = (0..256).map(|i| (i % 251) as u8).collect();
8144    hc.history_start = 0;
8145    hc.history_abs_start = 0;
8146
8147    let abs_pos = 128usize;
8148    let current_abs_end = 256usize;
8149    let ldm = MatchCandidate {
8150        start: abs_pos,
8151        offset: 96,
8152        match_len: 40,
8153    };
8154
8155    let profile = HcOptimalCostProfile {
8156        max_chain_depth: 0,
8157        sufficient_match_len: usize::MAX / 2,
8158        accurate: true,
8159        favor_small_offsets: false,
8160    };
8161    let mut out = Vec::new();
8162    hc.collect_optimal_candidates(
8163        abs_pos,
8164        current_abs_end,
8165        profile,
8166        HcCandidateQuery {
8167            reps: [1, 4, 8],
8168            lit_len: 1,
8169            ldm_candidate: Some(ldm),
8170        },
8171        &mut out,
8172    );
8173    assert!(
8174        out.iter().any(
8175            |candidate| candidate.offset == ldm.offset && candidate.match_len == ldm.match_len
8176        ),
8177        "LDM candidate should be present in optimal candidate set"
8178    );
8179}
8180
8181#[test]
8182fn btultra_and_btultra2_both_keep_dictionary_candidates() {
8183    let mut hc = HcMatchGenerator::new(256);
8184    hc.history = alloc::vec![0u8; 160];
8185    for i in 0..64 {
8186        hc.history[i] = b'a' + (i % 7) as u8;
8187    }
8188    for i in 64..160 {
8189        hc.history[i] = b'k' + (i % 5) as u8;
8190    }
8191    let abs_pos = 96usize;
8192    for i in 0..24 {
8193        hc.history[abs_pos + i] = hc.history[16 + i];
8194    }
8195    hc.history_start = 0;
8196    hc.history_abs_start = 0;
8197    hc.position_base = 0;
8198    hc.search_depth = 32;
8199    hc.ensure_tables();
8200    hc.insert_positions(0, abs_pos);
8201
8202    let profile = HcOptimalCostProfile {
8203        max_chain_depth: 32,
8204        sufficient_match_len: usize::MAX / 2,
8205        accurate: true,
8206        favor_small_offsets: false,
8207    };
8208    let mut out = Vec::new();
8209
8210    hc.parse_mode = HcParseMode::BtUltra2;
8211    hc.dictionary_limit_abs = Some(64);
8212    hc.collect_optimal_candidates(
8213        abs_pos,
8214        160,
8215        profile,
8216        HcCandidateQuery {
8217            reps: [1, 4, 8],
8218            lit_len: 1,
8219            ldm_candidate: None,
8220        },
8221        &mut out,
8222    );
8223    assert!(
8224        out.iter().any(|candidate| candidate.offset >= 32),
8225        "btultra2 should retain dictionary candidates on donor-parity path"
8226    );
8227
8228    hc.parse_mode = HcParseMode::BtUltra;
8229    hc.skip_insert_until_abs = 0;
8230    hc.collect_optimal_candidates(
8231        abs_pos,
8232        160,
8233        profile,
8234        HcCandidateQuery {
8235            reps: [1, 4, 8],
8236            lit_len: 1,
8237            ldm_candidate: None,
8238        },
8239        &mut out,
8240    );
8241    assert!(
8242        out.iter().any(|candidate| candidate.offset >= 32),
8243        "btultra should retain dictionary candidates"
8244    );
8245}
8246
8247#[test]
8248fn driver_small_source_hint_shrinks_dfast_hash_tables() {
8249    let mut driver = MatchGeneratorDriver::new(32, 2);
8250
8251    driver.reset(CompressionLevel::Level(2));
8252    let mut space = driver.get_next_space();
8253    space[..12].copy_from_slice(b"abcabcabcabc");
8254    space.truncate(12);
8255    driver.commit_space(space);
8256    driver.skip_matching_with_hint(None);
8257    let full_tables = driver.dfast_matcher().short_hash.len();
8258    assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
8259
8260    driver.set_source_size_hint(1024);
8261    driver.reset(CompressionLevel::Level(2));
8262    let mut space = driver.get_next_space();
8263    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
8264    space.truncate(12);
8265    driver.commit_space(space);
8266    driver.skip_matching_with_hint(None);
8267    let hinted_tables = driver.dfast_matcher().short_hash.len();
8268
8269    assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
8270    assert_eq!(hinted_tables, 1 << MIN_HINTED_WINDOW_LOG);
8271    assert!(
8272        hinted_tables < full_tables,
8273        "tiny source hint should reduce dfast table footprint"
8274    );
8275}
8276
8277#[test]
8278fn driver_small_source_hint_shrinks_row_hash_tables() {
8279    let mut driver = MatchGeneratorDriver::new(32, 2);
8280
8281    driver.reset(CompressionLevel::Level(4));
8282    let mut space = driver.get_next_space();
8283    space[..12].copy_from_slice(b"abcabcabcabc");
8284    space.truncate(12);
8285    driver.commit_space(space);
8286    driver.skip_matching_with_hint(None);
8287    let full_rows = driver.row_matcher().row_heads.len();
8288    assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
8289
8290    driver.set_source_size_hint(1024);
8291    driver.reset(CompressionLevel::Level(4));
8292    let mut space = driver.get_next_space();
8293    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
8294    space.truncate(12);
8295    driver.commit_space(space);
8296    driver.skip_matching_with_hint(None);
8297    let hinted_rows = driver.row_matcher().row_heads.len();
8298
8299    assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
8300    assert_eq!(
8301        hinted_rows,
8302        1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
8303    );
8304    assert!(
8305        hinted_rows < full_rows,
8306        "tiny source hint should reduce row hash table footprint"
8307    );
8308}
8309
8310#[test]
8311fn row_matches_roundtrip_multi_block_pattern() {
8312    let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
8313    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
8314    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
8315
8316    let mut matcher = RowMatchGenerator::new(1 << 22);
8317    matcher.configure(ROW_CONFIG);
8318    matcher.ensure_tables();
8319    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
8320        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
8321        Sequence::Triple {
8322            literals,
8323            offset,
8324            match_len,
8325        } => {
8326            decoded.extend_from_slice(literals);
8327            let start = decoded.len() - offset;
8328            for i in 0..match_len {
8329                let byte = decoded[start + i];
8330                decoded.push(byte);
8331            }
8332        }
8333    };
8334
8335    matcher.add_data(first_block.clone(), |_| {});
8336    let mut history = Vec::new();
8337    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
8338    assert_eq!(history, first_block);
8339
8340    matcher.add_data(second_block.clone(), |_| {});
8341    let prefix_len = history.len();
8342    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
8343
8344    assert_eq!(&history[prefix_len..], second_block.as_slice());
8345
8346    // Force a literals-only pass so the Sequence::Literals arm is exercised.
8347    let third_block: Vec<u8> = (0u8..=255).collect();
8348    matcher.add_data(third_block.clone(), |_| {});
8349    let third_prefix = history.len();
8350    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
8351    assert_eq!(&history[third_prefix..], third_block.as_slice());
8352}
8353
8354#[test]
8355fn row_short_block_emits_literals_only() {
8356    let mut matcher = RowMatchGenerator::new(1 << 22);
8357    matcher.configure(ROW_CONFIG);
8358
8359    matcher.add_data(b"abcde".to_vec(), |_| {});
8360
8361    let mut saw_triple = false;
8362    let mut reconstructed = Vec::new();
8363    matcher.start_matching(|seq| match seq {
8364        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
8365        Sequence::Triple { .. } => saw_triple = true,
8366    });
8367
8368    assert!(
8369        !saw_triple,
8370        "row backend must not emit triples for short blocks"
8371    );
8372    assert_eq!(reconstructed, b"abcde");
8373
8374    // Then feed a clearly matchable block and ensure the Triple arm is reachable.
8375    saw_triple = false;
8376    matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
8377    matcher.start_matching(|seq| {
8378        if let Sequence::Triple { .. } = seq {
8379            saw_triple = true;
8380        }
8381    });
8382    assert!(
8383        saw_triple,
8384        "row backend should emit triples on repeated data"
8385    );
8386}
8387
8388#[test]
8389fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
8390    let mut matcher = RowMatchGenerator::new(1 << 22);
8391    matcher.configure(ROW_CONFIG);
8392    matcher.add_data(b"abcabc".to_vec(), |_| {});
8393
8394    let best = MatchCandidate {
8395        start: 0,
8396        offset: 1,
8397        match_len: ROW_MIN_MATCH_LEN,
8398    };
8399    let picked = matcher
8400        .pick_lazy_match(0, 0, Some(best))
8401        .expect("best candidate must survive");
8402
8403    assert_eq!(picked.start, best.start);
8404    assert_eq!(picked.offset, best.offset);
8405    assert_eq!(picked.match_len, best.match_len);
8406}
8407
8408#[test]
8409fn row_backfills_previous_block_tail_for_cross_boundary_match() {
8410    let mut matcher = RowMatchGenerator::new(1 << 22);
8411    matcher.configure(ROW_CONFIG);
8412
8413    let mut first_block = alloc::vec![0xA5; 64];
8414    first_block.extend_from_slice(b"XYZ");
8415    let second_block = b"XYZXYZtail".to_vec();
8416
8417    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
8418        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
8419        Sequence::Triple {
8420            literals,
8421            offset,
8422            match_len,
8423        } => {
8424            decoded.extend_from_slice(literals);
8425            let start = decoded.len() - offset;
8426            for i in 0..match_len {
8427                let byte = decoded[start + i];
8428                decoded.push(byte);
8429            }
8430        }
8431    };
8432
8433    matcher.add_data(first_block.clone(), |_| {});
8434    let mut reconstructed = Vec::new();
8435    matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
8436    assert_eq!(reconstructed, first_block);
8437
8438    matcher.add_data(second_block.clone(), |_| {});
8439    let mut saw_cross_boundary = false;
8440    let prefix_len = reconstructed.len();
8441    matcher.start_matching(|seq| {
8442        if let Sequence::Triple {
8443            literals,
8444            offset,
8445            match_len,
8446        } = seq
8447            && literals.is_empty()
8448            && offset == 3
8449            && match_len >= ROW_MIN_MATCH_LEN
8450        {
8451            saw_cross_boundary = true;
8452        }
8453        replay_sequence(&mut reconstructed, seq);
8454    });
8455
8456    assert!(
8457        saw_cross_boundary,
8458        "row matcher should reuse the 3-byte previous-block tail"
8459    );
8460    assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
8461}
8462
8463#[test]
8464fn row_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
8465    let data = deterministic_high_entropy_bytes(0xA713_9C5D_44E2_10B1, 4096);
8466
8467    let mut dense = RowMatchGenerator::new(1 << 22);
8468    dense.configure(ROW_CONFIG);
8469    dense.add_data(data.clone(), |_| {});
8470    dense.skip_matching_with_hint(Some(false));
8471    let dense_slots = dense
8472        .row_positions
8473        .iter()
8474        .filter(|&&pos| pos != ROW_EMPTY_SLOT)
8475        .count();
8476
8477    let mut sparse = RowMatchGenerator::new(1 << 22);
8478    sparse.configure(ROW_CONFIG);
8479    sparse.add_data(data, |_| {});
8480    sparse.skip_matching_with_hint(Some(true));
8481    let sparse_slots = sparse
8482        .row_positions
8483        .iter()
8484        .filter(|&&pos| pos != ROW_EMPTY_SLOT)
8485        .count();
8486
8487    assert!(
8488        sparse_slots < dense_slots,
8489        "incompressible hint should seed fewer row slots (sparse={sparse_slots}, dense={dense_slots})"
8490    );
8491}
8492
8493#[test]
8494fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
8495    let mut driver = MatchGeneratorDriver::new(32, 2);
8496
8497    driver.reset(CompressionLevel::Level(2));
8498    let mut space = driver.get_next_space();
8499    space[..12].copy_from_slice(b"abcabcabcabc");
8500    space.truncate(12);
8501    driver.commit_space(space);
8502    driver.skip_matching_with_hint(None);
8503
8504    let table_len = driver.dfast_matcher().short_hash.len();
8505    assert_eq!(
8506        table_len,
8507        1 << DFAST_HASH_BITS,
8508        "unhinted Level(2) should keep default dfast table size"
8509    );
8510}
8511
8512#[test]
8513fn simple_backend_rejects_undersized_pooled_suffix_store() {
8514    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
8515    driver.reset(CompressionLevel::Fastest);
8516
8517    driver.suffix_pool.push(SuffixStore::with_capacity(1024));
8518
8519    let mut space = driver.get_next_space();
8520    space.clear();
8521    space.resize(4096, 0xAB);
8522    driver.commit_space(space);
8523
8524    let last_suffix_slots = driver
8525        .match_generator
8526        .window
8527        .last()
8528        .expect("window entry must exist after commit")
8529        .suffixes
8530        .slots
8531        .len();
8532    assert!(
8533        last_suffix_slots >= 4096,
8534        "undersized pooled suffix store must not be reused for larger blocks"
8535    );
8536}
8537
8538#[test]
8539fn source_hint_clamps_driver_slice_size_to_window() {
8540    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
8541    driver.set_source_size_hint(1024);
8542    driver.reset(CompressionLevel::Default);
8543
8544    let window = driver.window_size() as usize;
8545    assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
8546    assert_eq!(driver.slice_size, window);
8547
8548    let space = driver.get_next_space();
8549    assert_eq!(space.len(), window);
8550    driver.commit_space(space);
8551}
8552
8553#[test]
8554fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
8555    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
8556    driver.reset(CompressionLevel::Default);
8557
8558    let large = driver.get_next_space();
8559    let large_capacity = large.capacity();
8560    assert!(large_capacity >= 128 * 1024);
8561    driver.commit_space(large);
8562
8563    driver.set_source_size_hint(1024);
8564    driver.reset(CompressionLevel::Default);
8565
8566    let small = driver.get_next_space();
8567    assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
8568    assert!(
8569        small.capacity() >= large_capacity,
8570        "pooled buffer capacity should be preserved to avoid shrink/grow churn"
8571    );
8572}
8573
8574#[test]
8575fn driver_best_to_fastest_releases_oversized_hc_tables() {
8576    let mut driver = MatchGeneratorDriver::new(32, 2);
8577
8578    // Initialize at Best — allocates large HC tables (2M hash, 1M chain).
8579    driver.reset(CompressionLevel::Best);
8580    assert_eq!(driver.window_size(), (1u64 << 24));
8581
8582    // Feed data so tables are actually allocated via ensure_tables().
8583    let mut space = driver.get_next_space();
8584    space[..12].copy_from_slice(b"abcabcabcabc");
8585    space.truncate(12);
8586    driver.commit_space(space);
8587    driver.skip_matching_with_hint(None);
8588
8589    // Switch to Fastest — must release HC tables.
8590    driver.reset(CompressionLevel::Fastest);
8591    assert_eq!(driver.window_size(), (1u64 << 17));
8592
8593    // HC matcher should have empty tables after backend switch.
8594    let hc = driver.hc_match_generator.as_ref().unwrap();
8595    assert!(
8596        hc.hash_table.is_empty(),
8597        "HC hash_table should be released after switching away from Best"
8598    );
8599    assert!(
8600        hc.chain_table.is_empty(),
8601        "HC chain_table should be released after switching away from Best"
8602    );
8603}
8604
8605#[test]
8606fn driver_better_to_best_resizes_hc_tables() {
8607    let mut driver = MatchGeneratorDriver::new(32, 2);
8608
8609    // Initialize at Better — allocates small HC tables (1M hash, 512K chain).
8610    driver.reset(CompressionLevel::Better);
8611    assert_eq!(driver.window_size(), (1u64 << 23));
8612
8613    let mut space = driver.get_next_space();
8614    space[..12].copy_from_slice(b"abcabcabcabc");
8615    space.truncate(12);
8616    driver.commit_space(space);
8617    driver.skip_matching_with_hint(None);
8618
8619    let hc = driver.hc_match_generator.as_ref().unwrap();
8620    let better_hash_len = hc.hash_table.len();
8621    let better_chain_len = hc.chain_table.len();
8622
8623    // Switch to Best — must resize to larger tables.
8624    driver.reset(CompressionLevel::Best);
8625    assert_eq!(driver.window_size(), (1u64 << 24));
8626
8627    // Feed data to trigger ensure_tables with new sizes.
8628    let mut space = driver.get_next_space();
8629    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
8630    space.truncate(12);
8631    driver.commit_space(space);
8632    driver.skip_matching_with_hint(None);
8633
8634    let hc = driver.hc_match_generator.as_ref().unwrap();
8635    assert!(
8636        hc.hash_table.len() > better_hash_len,
8637        "Best hash_table ({}) should be larger than Better ({})",
8638        hc.hash_table.len(),
8639        better_hash_len
8640    );
8641    assert!(
8642        hc.chain_table.len() > better_chain_len,
8643        "Best chain_table ({}) should be larger than Better ({})",
8644        hc.chain_table.len(),
8645        better_chain_len
8646    );
8647}
8648
8649#[test]
8650fn prime_with_dictionary_preserves_history_for_first_full_block() {
8651    let mut driver = MatchGeneratorDriver::new(8, 1);
8652    driver.reset(CompressionLevel::Fastest);
8653
8654    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
8655
8656    let mut space = driver.get_next_space();
8657    space.clear();
8658    space.extend_from_slice(b"abcdefgh");
8659    driver.commit_space(space);
8660
8661    let mut saw_match = false;
8662    driver.start_matching(|seq| {
8663        if let Sequence::Triple {
8664            literals,
8665            offset,
8666            match_len,
8667        } = seq
8668            && literals.is_empty()
8669            && offset == 8
8670            && match_len >= MIN_MATCH_LEN
8671        {
8672            saw_match = true;
8673        }
8674    });
8675
8676    assert!(
8677        saw_match,
8678        "first full block should still match dictionary-primed history"
8679    );
8680}
8681
8682#[test]
8683fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
8684    let mut driver = MatchGeneratorDriver::new(8, 1);
8685    driver.reset(CompressionLevel::Fastest);
8686
8687    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
8688
8689    let mut space = driver.get_next_space();
8690    space.clear();
8691    space.extend_from_slice(b"abcdefgh");
8692    driver.commit_space(space);
8693
8694    let mut saw_match = false;
8695    driver.start_matching(|seq| {
8696        if let Sequence::Triple {
8697            literals,
8698            offset,
8699            match_len,
8700        } = seq
8701            && literals.is_empty()
8702            && offset == 24
8703            && match_len >= MIN_MATCH_LEN
8704        {
8705            saw_match = true;
8706        }
8707    });
8708
8709    assert!(
8710        saw_match,
8711        "dictionary bytes should remain addressable until frame output exceeds the live window"
8712    );
8713}
8714
8715#[test]
8716fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
8717    let mut driver = MatchGeneratorDriver::new(8, 1);
8718    driver.reset(CompressionLevel::Fastest);
8719
8720    driver.prime_with_dictionary(&[], [11, 7, 3]);
8721
8722    assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
8723}
8724
8725#[test]
8726fn hc_prime_with_empty_dictionary_disables_btultra2_seed_pass() {
8727    let mut driver = MatchGeneratorDriver::new(8, 1);
8728    driver.reset(CompressionLevel::Better);
8729
8730    driver.prime_with_dictionary(&[], [11, 7, 3]);
8731
8732    assert_eq!(driver.hc_matcher().offset_hist, [11, 7, 3]);
8733    assert!(
8734        !driver
8735            .hc_matcher()
8736            .should_run_btultra2_seed_pass(HC_PREDEF_THRESHOLD + 1),
8737        "btultra2 warmup must stay disabled after dictionary priming, even when dict content is empty"
8738    );
8739}
8740
8741#[test]
8742fn hc_prime_with_dictionary_disables_btultra2_seed_pass() {
8743    let mut driver = MatchGeneratorDriver::new(8, 1);
8744    driver.reset(CompressionLevel::Better);
8745
8746    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
8747
8748    assert!(
8749        !driver
8750            .hc_matcher()
8751            .should_run_btultra2_seed_pass(HC_PREDEF_THRESHOLD + 1),
8752        "btultra2 warmup must stay disabled after dictionary priming with content"
8753    );
8754}
8755
8756#[test]
8757fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
8758    let mut driver = MatchGeneratorDriver::new(8, 1);
8759    driver.reset(CompressionLevel::Level(2));
8760
8761    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
8762
8763    let mut space = driver.get_next_space();
8764    space.clear();
8765    space.extend_from_slice(b"abcdefgh");
8766    driver.commit_space(space);
8767
8768    let mut saw_match = false;
8769    driver.start_matching(|seq| {
8770        if let Sequence::Triple {
8771            literals,
8772            offset,
8773            match_len,
8774        } = seq
8775            && literals.is_empty()
8776            && offset == 8
8777            && match_len >= DFAST_MIN_MATCH_LEN
8778        {
8779            saw_match = true;
8780        }
8781    });
8782
8783    assert!(
8784        saw_match,
8785        "dfast backend should match dictionary-primed history in first full block"
8786    );
8787}
8788
8789#[test]
8790fn prime_with_dictionary_does_not_inflate_reported_window_size() {
8791    let mut driver = MatchGeneratorDriver::new(8, 1);
8792    driver.reset(CompressionLevel::Fastest);
8793
8794    let before = driver.window_size();
8795    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
8796    let after = driver.window_size();
8797
8798    assert_eq!(
8799        after, before,
8800        "dictionary retention budget must not change reported frame window size"
8801    );
8802}
8803
8804#[test]
8805fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
8806    let mut driver = MatchGeneratorDriver::new(8, 2);
8807    driver.reset(CompressionLevel::Fastest);
8808
8809    // This dictionary leaves a 1-byte tail chunk (capacity=1 suffix table),
8810    // which should never be committed to the matcher window.
8811    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
8812
8813    assert!(
8814        driver
8815            .match_generator
8816            .window
8817            .iter()
8818            .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
8819        "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
8820    );
8821}
8822
8823#[test]
8824fn prime_with_dictionary_counts_only_committed_tail_budget() {
8825    let mut driver = MatchGeneratorDriver::new(8, 1);
8826    driver.reset(CompressionLevel::Fastest);
8827
8828    let before = driver.match_generator.max_window_size;
8829    // One full slice plus a 1-byte tail that cannot be committed.
8830    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
8831
8832    assert_eq!(
8833        driver.match_generator.max_window_size,
8834        before + 8,
8835        "retention budget must account only for dictionary bytes actually committed to history"
8836    );
8837}
8838
8839#[test]
8840fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
8841    let mut driver = MatchGeneratorDriver::new(8, 1);
8842    driver.reset(CompressionLevel::Level(2));
8843
8844    let before = driver.dfast_matcher().max_window_size;
8845    // One full slice plus a 4-byte tail. Dfast can still use this tail through
8846    // short-hash overlap into the next block, so it should stay retained.
8847    driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
8848
8849    assert_eq!(
8850        driver.dfast_matcher().max_window_size,
8851        before + 12,
8852        "dfast retention budget should include 4-byte dictionary tails"
8853    );
8854}
8855
8856#[test]
8857fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
8858    let mut driver = MatchGeneratorDriver::new(8, 1);
8859    driver.reset(CompressionLevel::Level(4));
8860
8861    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
8862
8863    let mut space = driver.get_next_space();
8864    space.clear();
8865    space.extend_from_slice(b"abcdefgh");
8866    driver.commit_space(space);
8867
8868    let mut saw_match = false;
8869    driver.start_matching(|seq| {
8870        if let Sequence::Triple {
8871            literals,
8872            offset,
8873            match_len,
8874        } = seq
8875            && literals.is_empty()
8876            && offset == 8
8877            && match_len >= ROW_MIN_MATCH_LEN
8878        {
8879            saw_match = true;
8880        }
8881    });
8882
8883    assert!(
8884        saw_match,
8885        "row backend should match dictionary-primed history in first full block"
8886    );
8887}
8888
8889#[test]
8890fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
8891    let mut driver = MatchGeneratorDriver::new(8, 1);
8892    driver.reset(CompressionLevel::Level(4));
8893
8894    let base_window = driver.row_matcher().max_window_size;
8895    // Slice size is 8. The trailing byte cannot be committed (<4 tail),
8896    // so it must be subtracted from retained budget.
8897    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
8898
8899    assert_eq!(
8900        driver.row_matcher().max_window_size,
8901        base_window + 8,
8902        "row retained window must exclude uncommitted 1-byte tail"
8903    );
8904}
8905
8906#[test]
8907fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
8908    let mut driver = MatchGeneratorDriver::new(8, 1);
8909    driver.reset(CompressionLevel::Level(4));
8910    // Keep live window tiny so dictionary-primed slices are evicted quickly.
8911    driver.row_matcher_mut().max_window_size = 8;
8912    driver.reported_window_size = 8;
8913
8914    let base_window = driver.row_matcher().max_window_size;
8915    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
8916    assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
8917
8918    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
8919        let mut space = driver.get_next_space();
8920        space.clear();
8921        space.extend_from_slice(block);
8922        driver.commit_space(space);
8923        driver.skip_matching_with_hint(None);
8924    }
8925
8926    assert_eq!(
8927        driver.dictionary_retained_budget, 0,
8928        "dictionary budget should be fully retired once primed dict slices are evicted"
8929    );
8930    assert_eq!(
8931        driver.row_matcher().max_window_size,
8932        base_window,
8933        "retired dictionary budget must not remain reusable for live history"
8934    );
8935}
8936
8937#[test]
8938fn row_get_last_space_and_reset_to_fastest_clears_window() {
8939    let mut driver = MatchGeneratorDriver::new(8, 1);
8940    driver.reset(CompressionLevel::Level(4));
8941
8942    let mut space = driver.get_next_space();
8943    space.clear();
8944    space.extend_from_slice(b"row-data");
8945    driver.commit_space(space);
8946
8947    assert_eq!(driver.get_last_space(), b"row-data");
8948
8949    driver.reset(CompressionLevel::Fastest);
8950    assert_eq!(driver.active_backend, MatcherBackend::Simple);
8951    assert!(driver.row_matcher().window.is_empty());
8952}
8953
8954/// Ensures switching from Row to Simple returns pooled buffers and row tables.
8955#[test]
8956fn driver_reset_from_row_backend_reclaims_row_buffer_pool() {
8957    let mut driver = MatchGeneratorDriver::new(8, 1);
8958    driver.reset(CompressionLevel::Level(4));
8959    assert_eq!(driver.active_backend, MatcherBackend::Row);
8960
8961    // Ensure the row matcher option is initialized so reset() executes
8962    // the Row backend retirement path.
8963    let _ = driver.row_matcher();
8964    let mut space = driver.get_next_space();
8965    space.extend_from_slice(b"row-data-to-recycle");
8966    driver.commit_space(space);
8967
8968    let before_pool = driver.vec_pool.len();
8969    driver.reset(CompressionLevel::Fastest);
8970
8971    assert_eq!(driver.active_backend, MatcherBackend::Simple);
8972    let row = driver
8973        .row_match_generator
8974        .as_ref()
8975        .expect("row matcher should remain allocated after switch");
8976    assert!(row.row_heads.is_empty());
8977    assert!(row.row_positions.is_empty());
8978    assert!(row.row_tags.is_empty());
8979    assert!(
8980        driver.vec_pool.len() >= before_pool,
8981        "row reset should recycle row history buffers"
8982    );
8983}
8984
8985/// Guards the optional row backend retirement path when no row matcher was allocated.
8986#[test]
8987fn driver_reset_from_row_backend_tolerates_missing_row_matcher() {
8988    let mut driver = MatchGeneratorDriver::new(8, 1);
8989    driver.active_backend = MatcherBackend::Row;
8990    driver.row_match_generator = None;
8991
8992    driver.reset(CompressionLevel::Fastest);
8993
8994    assert_eq!(driver.active_backend, MatcherBackend::Simple);
8995}
8996
8997#[test]
8998fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
8999    let mut params = resolve_level_params(CompressionLevel::Level(4), None);
9000    params.window_log = 22;
9001    let adjusted = adjust_params_for_source_size(params, 0);
9002    assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
9003}
9004
9005#[test]
9006fn common_prefix_len_matches_scalar_reference_across_offsets() {
9007    fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
9008        a.iter()
9009            .zip(b.iter())
9010            .take_while(|(lhs, rhs)| lhs == rhs)
9011            .count()
9012    }
9013
9014    for total_len in [
9015        0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
9016    ] {
9017        let base: Vec<u8> = (0..total_len)
9018            .map(|i| ((i * 13 + 7) & 0xFF) as u8)
9019            .collect();
9020
9021        for start in [0usize, 1, 3] {
9022            if start > total_len {
9023                continue;
9024            }
9025            let a = &base[start..];
9026            let b = a.to_vec();
9027            assert_eq!(
9028                MatchGenerator::common_prefix_len(a, &b),
9029                scalar_reference(a, &b),
9030                "equal slices total_len={total_len} start={start}"
9031            );
9032
9033            let len = a.len();
9034            for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
9035                if mismatch >= len {
9036                    continue;
9037                }
9038                let mut altered = b.clone();
9039                altered[mismatch] ^= 0x5A;
9040                assert_eq!(
9041                    MatchGenerator::common_prefix_len(a, &altered),
9042                    scalar_reference(a, &altered),
9043                    "total_len={total_len} start={start} mismatch={mismatch}"
9044                );
9045            }
9046
9047            if len > 0 {
9048                let mismatch = len - 1;
9049                let mut altered = b.clone();
9050                altered[mismatch] ^= 0xA5;
9051                assert_eq!(
9052                    MatchGenerator::common_prefix_len(a, &altered),
9053                    scalar_reference(a, &altered),
9054                    "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
9055                );
9056            }
9057        }
9058    }
9059
9060    let long = alloc::vec![0xAB; 320];
9061    let shorter = alloc::vec![0xAB; 137];
9062    assert_eq!(
9063        MatchGenerator::common_prefix_len(&long, &shorter),
9064        scalar_reference(&long, &shorter)
9065    );
9066}
9067
9068#[test]
9069fn row_pick_lazy_returns_none_when_next_is_better() {
9070    let mut matcher = RowMatchGenerator::new(1 << 22);
9071    matcher.configure(ROW_CONFIG);
9072    matcher.add_data(alloc::vec![b'a'; 64], |_| {});
9073    matcher.ensure_tables();
9074
9075    let abs_pos = matcher.history_abs_start + 16;
9076    let best = MatchCandidate {
9077        start: abs_pos,
9078        offset: 8,
9079        match_len: ROW_MIN_MATCH_LEN,
9080    };
9081    assert!(
9082        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
9083        "lazy picker should defer when next position is clearly better"
9084    );
9085}
9086
9087#[test]
9088fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
9089    let mut matcher = RowMatchGenerator::new(1 << 22);
9090    matcher.configure(ROW_CONFIG);
9091    matcher.lazy_depth = 2;
9092    matcher.search_depth = 0;
9093    matcher.offset_hist = [6, 9, 1];
9094
9095    let mut data = alloc::vec![b'x'; 40];
9096    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
9097    matcher.add_data(data, |_| {});
9098    matcher.ensure_tables();
9099
9100    let abs_pos = matcher.history_abs_start + 20;
9101    let best = matcher
9102        .best_match(abs_pos, 0)
9103        .expect("expected baseline repcode match");
9104    assert_eq!(best.offset, 9);
9105    assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
9106
9107    if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
9108        assert!(next.match_len <= best.match_len);
9109    }
9110
9111    let next2 = matcher
9112        .best_match(abs_pos + 2, 2)
9113        .expect("expected +2 candidate");
9114    assert!(
9115        next2.match_len > best.match_len + 1,
9116        "+2 candidate must be significantly better for depth-2 lazy skip"
9117    );
9118    assert!(
9119        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
9120        "lazy picker should defer when +2 candidate is significantly better"
9121    );
9122}
9123
9124#[test]
9125fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
9126    let mut matcher = RowMatchGenerator::new(1 << 22);
9127    matcher.configure(ROW_CONFIG);
9128    matcher.lazy_depth = 2;
9129    matcher.search_depth = 0;
9130    matcher.offset_hist = [6, 9, 1];
9131
9132    let mut data = alloc::vec![b'x'; 40];
9133    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
9134    matcher.add_data(data, |_| {});
9135    matcher.ensure_tables();
9136
9137    let abs_pos = matcher.history_abs_start + 20;
9138    let best = matcher
9139        .best_match(abs_pos, 0)
9140        .expect("expected baseline repcode match");
9141    assert_eq!(best.offset, 9);
9142    assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
9143
9144    let next2 = matcher
9145        .best_match(abs_pos + 2, 2)
9146        .expect("expected +2 candidate");
9147    assert_eq!(next2.match_len, best.match_len + 1);
9148    let chosen = matcher
9149        .pick_lazy_match(abs_pos, 0, Some(best))
9150        .expect("lazy picker should keep current best");
9151    assert_eq!(chosen.start, best.start);
9152    assert_eq!(chosen.offset, best.offset);
9153    assert_eq!(chosen.match_len, best.match_len);
9154}
9155
9156/// Verifies row/tag extraction uses the shared hash mix bit-splitting contract.
9157#[test]
9158fn row_hash_and_row_extracts_high_bits() {
9159    let mut matcher = RowMatchGenerator::new(1 << 22);
9160    matcher.configure(ROW_CONFIG);
9161    matcher.add_data(
9162        alloc::vec![
9163            0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
9164            0x70, 0x80,
9165        ],
9166        |_| {},
9167    );
9168    matcher.ensure_tables();
9169
9170    let pos = matcher.history_abs_start + 8;
9171    let (row, tag) = matcher
9172        .hash_and_row(pos)
9173        .expect("row hash should be available");
9174
9175    let idx = pos - matcher.history_abs_start;
9176    let concat = matcher.live_history();
9177    let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
9178    let hash = crate::encoding::fastpath::hash_mix_u64_with_kernel(matcher.hash_kernel, value);
9179    let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
9180    let combined = hash >> (u64::BITS as usize - total_bits);
9181    let expected_row =
9182        ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
9183    let expected_tag = combined as u8;
9184
9185    assert_eq!(row, expected_row);
9186    assert_eq!(tag, expected_tag);
9187}
9188
9189#[test]
9190fn row_repcode_skips_candidate_before_history_start() {
9191    let mut matcher = RowMatchGenerator::new(1 << 22);
9192    matcher.configure(ROW_CONFIG);
9193    matcher.history = alloc::vec![b'a'; 20];
9194    matcher.history_start = 0;
9195    matcher.history_abs_start = 10;
9196    matcher.offset_hist = [3, 0, 0];
9197
9198    assert!(matcher.repcode_candidate(12, 1).is_none());
9199}
9200
9201#[test]
9202fn row_repcode_returns_none_when_position_too_close_to_history_end() {
9203    let mut matcher = RowMatchGenerator::new(1 << 22);
9204    matcher.configure(ROW_CONFIG);
9205    matcher.history = b"abcde".to_vec();
9206    matcher.history_start = 0;
9207    matcher.history_abs_start = 0;
9208    matcher.offset_hist = [1, 0, 0];
9209
9210    assert!(matcher.repcode_candidate(4, 1).is_none());
9211}
9212
9213#[cfg(all(feature = "std", target_arch = "x86_64"))]
9214#[test]
9215fn hash_mix_sse42_path_is_available_and_matches_accelerated_impl_when_supported() {
9216    use crate::encoding::fastpath::{self, FastpathKernel};
9217    if !is_x86_feature_detected!("sse4.2") {
9218        return;
9219    }
9220    let v = 0x0123_4567_89AB_CDEFu64;
9221    // SAFETY: feature check above guarantees SSE4.2 is available.
9222    let accelerated = unsafe { fastpath::sse42::hash_mix_u64(v) };
9223    // Dispatcher must resolve to SSE4.2 (or better) and produce the same mix.
9224    let dispatched = fastpath::dispatch_hash_mix_u64(v);
9225    let kernel = fastpath::select_kernel();
9226    if kernel == FastpathKernel::Sse42 {
9227        assert_eq!(dispatched, accelerated);
9228    } else {
9229        // AVX2 kernel uses the same CRC32 instruction under the hood.
9230        assert_eq!(dispatched, accelerated, "AVX2/SSE4.2 share CRC32 mix");
9231    }
9232}
9233
9234#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
9235#[test]
9236fn hash_mix_crc_path_is_available_and_matches_accelerated_impl_when_supported() {
9237    use crate::encoding::fastpath;
9238    if !is_aarch64_feature_detected!("crc") {
9239        return;
9240    }
9241    let v = 0x0123_4567_89AB_CDEFu64;
9242    // SAFETY: feature check above guarantees CRC32 is available.
9243    let accelerated = unsafe { fastpath::neon::hash_mix_u64(v) };
9244    let dispatched = fastpath::dispatch_hash_mix_u64(v);
9245    assert_eq!(dispatched, accelerated);
9246}
9247
9248#[test]
9249fn hc_hash3_position_matches_donor_formula() {
9250    let bytes = [b'a', b'b', b'c', b'd'];
9251    let read32 = u32::from_le_bytes(bytes);
9252    let expected = (((read32 << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - HC3_HASH_LOG)) as usize;
9253    assert_eq!(
9254        HcMatchGenerator::hash3_position(&bytes, HC3_HASH_LOG),
9255        expected
9256    );
9257}
9258
9259#[test]
9260fn hc_hash_position_matches_donor_hash4_formula() {
9261    let mut hc = HcMatchGenerator::new(1 << 20);
9262    hc.configure(HC_CONFIG, 22);
9263    let bytes = [b'a', b'b', b'c', b'd'];
9264    let read32 = u32::from_le_bytes(bytes);
9265    let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.hash_log)) as usize;
9266    assert_eq!(hc.hash_position(&bytes), expected);
9267}
9268
9269#[test]
9270fn btultra2_main_hash_uses_donor_hash4_formula() {
9271    let mut hc = HcMatchGenerator::new(1 << 20);
9272    hc.configure(BTULTRA2_HC_CONFIG_L22, 27);
9273    let bytes = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h'];
9274    let read32 = u32::from_le_bytes(bytes[..4].try_into().unwrap());
9275    let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.hash_log)) as usize;
9276    let actual = HcMatchGenerator::hash_position_with_mls(&bytes, hc.hash_log, hc.bt_hash_mls());
9277    assert_eq!(actual, expected);
9278}
9279
9280#[test]
9281fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
9282    let mut matcher = RowMatchGenerator::new(1 << 22);
9283    matcher.configure(ROW_CONFIG);
9284    matcher.history = b"abcde".to_vec();
9285    matcher.history_start = 0;
9286    matcher.history_abs_start = 0;
9287
9288    assert!(matcher.row_candidate(0, 0).is_none());
9289}
9290
9291#[test]
9292fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
9293    let mut hc = HcMatchGenerator::new(32);
9294    hc.history = b"abc".to_vec();
9295    hc.history_start = 0;
9296    hc.history_abs_start = 0;
9297    hc.ensure_tables();
9298
9299    let candidates = hc.chain_candidates(0);
9300    assert!(candidates.iter().all(|&pos| pos == usize::MAX));
9301}
9302
9303#[test]
9304fn hc_reset_refills_existing_tables_with_empty_sentinel() {
9305    let mut hc = HcMatchGenerator::new(32);
9306    hc.add_data(b"abcdeabcde".to_vec(), |_| {});
9307    hc.ensure_tables();
9308    assert!(!hc.hash_table.is_empty());
9309    assert!(!hc.chain_table.is_empty());
9310    hc.hash_table.fill(123);
9311    hc.chain_table.fill(456);
9312
9313    hc.reset(|_| {});
9314
9315    assert!(hc.hash_table.iter().all(|&v| v == HC_EMPTY));
9316    assert!(hc.chain_table.iter().all(|&v| v == HC_EMPTY));
9317}
9318
9319#[test]
9320fn hc_start_matching_returns_early_for_empty_current_block() {
9321    let mut hc = HcMatchGenerator::new(32);
9322    hc.add_data(Vec::new(), |_| {});
9323    let mut called = false;
9324    hc.start_matching(|_| called = true);
9325    assert!(!called, "empty current block should not emit sequences");
9326}
9327
9328#[cfg(test)]
9329fn deterministic_high_entropy_bytes(seed: u64, len: usize) -> Vec<u8> {
9330    let mut out = Vec::with_capacity(len);
9331    let mut state = seed;
9332    for _ in 0..len {
9333        state ^= state << 13;
9334        state ^= state >> 7;
9335        state ^= state << 17;
9336        out.push((state >> 40) as u8);
9337    }
9338    out
9339}
9340
9341#[cfg(test)]
9342fn level22_donor_block_ranges(data: &[u8]) -> Vec<(usize, usize)> {
9343    let mut ranges = Vec::new();
9344    let mut cursor = 0usize;
9345    let mut savings = 0i64;
9346    while cursor < data.len() {
9347        let remaining = data.len() - cursor;
9348        let candidate_len = remaining.min(HC_BLOCKSIZE_MAX);
9349        let block_len = crate::encoding::frame_compressor::donor_optimal_block_size(
9350            CompressionLevel::Level(22),
9351            &data[cursor..cursor + candidate_len],
9352            remaining,
9353            HC_BLOCKSIZE_MAX,
9354            savings,
9355        )
9356        .min(candidate_len)
9357        .max(1);
9358        ranges.push((cursor, block_len));
9359        cursor += block_len;
9360        // The exact donor gate uses compressed-size savings. For this corpus
9361        // parity harness, after the first full block has compressed, savings is
9362        // sufficient to authorize the same pre-block splitter path.
9363        if cursor >= HC_BLOCKSIZE_MAX {
9364            savings = 3;
9365        }
9366    }
9367    ranges
9368}
9369
9370#[cfg(test)]
9371fn merge_block_delimiters_like_donor(
9372    sequences: Vec<(usize, usize, usize)>,
9373) -> Vec<(usize, usize, usize)> {
9374    let mut out = Vec::with_capacity(sequences.len());
9375    let mut pending_lits = 0usize;
9376    for (lit_len, offset, match_len) in sequences {
9377        if offset == 0 && match_len == 0 {
9378            pending_lits = pending_lits.saturating_add(lit_len);
9379            continue;
9380        }
9381        out.push((lit_len.saturating_add(pending_lits), offset, match_len));
9382        pending_lits = 0;
9383    }
9384    if pending_lits > 0 {
9385        out.push((pending_lits, 0, 0));
9386    }
9387    out
9388}
9389
9390#[cfg(test)]
9391fn collect_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
9392    merge_block_delimiters_like_donor(collect_level22_sequences_with_delimiters(data))
9393        .into_iter()
9394        .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
9395        .collect()
9396}
9397
9398#[cfg(test)]
9399fn collect_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
9400    let mut driver = MatchGeneratorDriver::new(HC_BLOCKSIZE_MAX, 1);
9401    driver.set_source_size_hint(data.len() as u64);
9402    driver.reset(CompressionLevel::Level(22));
9403
9404    let mut sequences = Vec::new();
9405    for (chunk_start, chunk_len) in level22_donor_block_ranges(data) {
9406        let chunk = &data[chunk_start..chunk_start + chunk_len];
9407        let mut space = driver.get_next_space();
9408        space[..chunk.len()].copy_from_slice(chunk);
9409        space.truncate(chunk.len());
9410        driver.commit_space(space);
9411        driver.start_matching(|seq| {
9412            let entry = match seq {
9413                Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
9414                Sequence::Triple {
9415                    literals,
9416                    offset,
9417                    match_len,
9418                } => (literals.len(), offset, match_len),
9419            };
9420            sequences.push(entry);
9421        });
9422    }
9423    sequences
9424}
9425
9426#[cfg(test)]
9427fn donor_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
9428    merge_block_delimiters_like_donor(donor_level22_sequences_with_delimiters(data))
9429        .into_iter()
9430        .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
9431        .collect()
9432}
9433
9434#[cfg(test)]
9435fn donor_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
9436    use zstd::zstd_safe;
9437    use zstd::zstd_safe::zstd_sys;
9438
9439    fn assert_zstd_ok(code: usize, context: &str) {
9440        assert_eq!(
9441            unsafe { zstd_sys::ZSTD_isError(code) },
9442            0,
9443            "{context} failed: {}",
9444            zstd_safe::get_error_name(code)
9445        );
9446    }
9447
9448    unsafe {
9449        let cctx = zstd_sys::ZSTD_createCCtx();
9450        assert!(!cctx.is_null(), "ZSTD_createCCtx returned null");
9451
9452        assert_zstd_ok(
9453            zstd_sys::ZSTD_CCtx_setParameter(
9454                cctx,
9455                zstd_sys::ZSTD_cParameter::ZSTD_c_compressionLevel,
9456                22,
9457            ),
9458            "ZSTD_c_compressionLevel",
9459        );
9460
9461        let seq_capacity = zstd_safe::sequence_bound(data.len());
9462        let mut seqs = alloc::vec![
9463            zstd_sys::ZSTD_Sequence {
9464                offset: 0,
9465                litLength: 0,
9466                matchLength: 0,
9467                rep: 0,
9468            };
9469            seq_capacity
9470        ];
9471
9472        let seq_count = zstd_sys::ZSTD_generateSequences(
9473            cctx,
9474            seqs.as_mut_ptr(),
9475            seqs.len(),
9476            data.as_ptr().cast(),
9477            data.len(),
9478        );
9479        assert_zstd_ok(seq_count, "ZSTD_generateSequences");
9480        let rc = zstd_sys::ZSTD_freeCCtx(cctx);
9481        assert_eq!(rc, 0, "ZSTD_freeCCtx failed");
9482
9483        seqs.truncate(seq_count);
9484        seqs.into_iter()
9485            .map(|seq| {
9486                (
9487                    seq.litLength as usize,
9488                    seq.offset as usize,
9489                    seq.matchLength as usize,
9490                )
9491            })
9492            .collect()
9493    }
9494}
9495
9496#[test]
9497fn level22_sequences_match_donor_on_corpus_proxy() {
9498    let data = include_bytes!("../../decodecorpus_files/z000033");
9499    assert_level22_sequences_match_donor(data);
9500}
9501
9502#[test]
9503fn level22_sequences_match_donor_on_small_corpus_proxy() {
9504    let data = include_bytes!("../../decodecorpus_files/z000030");
9505    assert_level22_sequences_match_donor(data);
9506}
9507
9508#[cfg(test)]
9509fn assert_level22_sequences_match_donor(data: &[u8]) {
9510    let rust = collect_level22_sequences(data);
9511    let donor = donor_level22_sequences(data);
9512
9513    if rust != donor {
9514        let first_diff = rust
9515            .iter()
9516            .zip(donor.iter())
9517            .position(|(lhs, rhs)| lhs != rhs)
9518            .unwrap_or_else(|| rust.len().min(donor.len()));
9519        let rust_pos = rust
9520            .iter()
9521            .take(first_diff)
9522            .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
9523        let donor_pos = donor
9524            .iter()
9525            .take(first_diff)
9526            .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
9527        let start = first_diff.saturating_sub(4);
9528        let rust_window = &rust[start..rust.len().min(first_diff + 4)];
9529        let donor_window = &donor[start..donor.len().min(first_diff + 4)];
9530        let mut reps = [1u32, 4, 8];
9531        for (lit_len, offset, _) in rust.iter().take(first_diff) {
9532            let _ = encode_offset_with_history(*offset as u32, *lit_len as u32, &mut reps);
9533        }
9534        panic!(
9535            "level22 sequence path diverged at idx {}: rust={:?} donor={:?} (rust_len={} donor_len={} rust_pos={} donor_pos={} reps_before={:?} rust_window={:?} donor_window={:?} block_ranges={:?})",
9536            first_diff,
9537            rust.get(first_diff),
9538            donor.get(first_diff),
9539            rust.len(),
9540            donor.len(),
9541            rust_pos,
9542            donor_pos,
9543            reps,
9544            rust_window,
9545            donor_window,
9546            level22_donor_block_ranges(data)
9547                .into_iter()
9548                .filter(|(start, len)| *start <= rust_pos && rust_pos < start + len)
9549                .collect::<Vec<_>>(),
9550        );
9551    }
9552}
9553
9554#[test]
9555fn hc_sparse_skip_matching_preserves_tail_cross_block_match() {
9556    let mut matcher = HcMatchGenerator::new(1 << 22);
9557    let tail = b"Qz9kLm2Rp";
9558    let mut first = deterministic_high_entropy_bytes(0xD1B5_4A32_9C77_0E19, 4096);
9559    let tail_start = first.len() - tail.len();
9560    first[tail_start..].copy_from_slice(tail);
9561    matcher.add_data(first.clone(), |_| {});
9562    matcher.skip_matching(Some(true));
9563
9564    let mut second = tail.to_vec();
9565    second.extend_from_slice(b"after-tail-literals");
9566    matcher.add_data(second, |_| {});
9567
9568    let mut first_sequence = None;
9569    matcher.start_matching(|seq| {
9570        if first_sequence.is_some() {
9571            return;
9572        }
9573        first_sequence = Some(match seq {
9574            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
9575            Sequence::Triple {
9576                literals,
9577                offset,
9578                match_len,
9579            } => (literals.len(), offset, match_len),
9580        });
9581    });
9582
9583    let (literals_len, offset, match_len) =
9584        first_sequence.expect("expected at least one sequence after sparse skip");
9585    assert_eq!(
9586        literals_len, 0,
9587        "first sequence should start at block boundary"
9588    );
9589    assert_eq!(
9590        offset,
9591        tail.len(),
9592        "first match should reference previous tail"
9593    );
9594    assert!(
9595        match_len >= tail.len(),
9596        "tail-aligned cross-block match must be preserved"
9597    );
9598}
9599
9600#[test]
9601fn btultra2_sparse_skip_matching_preserves_tail_cross_block_match() {
9602    let mut matcher = HcMatchGenerator::new(1 << 20);
9603    matcher.configure(BTULTRA2_HC_CONFIG_L22, 20);
9604    let tail = b"Bt9kLm2Rp";
9605    let mut first = deterministic_high_entropy_bytes(0xA9C3_7F21_D4E8_510B, 4096);
9606    let tail_start = first.len() - tail.len();
9607    first[tail_start..].copy_from_slice(tail);
9608    matcher.add_data(first, |_| {});
9609    matcher.skip_matching(Some(true));
9610
9611    let mut second = tail.to_vec();
9612    second.extend_from_slice(b"after-tail-literals");
9613    matcher.add_data(second, |_| {});
9614
9615    let mut first_sequence = None;
9616    matcher.start_matching(|seq| {
9617        if first_sequence.is_some() {
9618            return;
9619        }
9620        first_sequence = Some(match seq {
9621            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
9622            Sequence::Triple {
9623                literals,
9624                offset,
9625                match_len,
9626            } => (literals.len(), offset, match_len),
9627        });
9628    });
9629
9630    let (literals_len, offset, match_len) =
9631        first_sequence.expect("expected at least one sequence after sparse BT skip");
9632    assert_eq!(
9633        literals_len, 0,
9634        "BT sparse skip should preserve an immediate boundary match"
9635    );
9636    assert_eq!(
9637        offset,
9638        tail.len(),
9639        "first BT match should reference previous tail"
9640    );
9641    assert!(
9642        match_len >= tail.len(),
9643        "BT sparse skip must seed the dense tail for cross-block matching"
9644    );
9645}
9646
9647#[test]
9648fn hc_sparse_skip_matching_does_not_reinsert_sparse_tail_positions() {
9649    let mut matcher = HcMatchGenerator::new(1 << 22);
9650    let first = deterministic_high_entropy_bytes(0xC2B2_AE3D_27D4_EB4F, 4096);
9651    matcher.add_data(first.clone(), |_| {});
9652    matcher.skip_matching(Some(true));
9653
9654    let current_len = first.len();
9655    let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
9656    let current_abs_end = current_abs_start + current_len;
9657    let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
9658    let tail_start = current_abs_end
9659        .saturating_sub(dense_tail)
9660        .max(matcher.history_abs_start)
9661        .max(current_abs_start);
9662
9663    let overlap_pos = (tail_start..current_abs_end)
9664        .find(|&pos| (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP))
9665        .expect("fixture should contain at least one sparse-grid overlap in dense tail");
9666
9667    let rel = matcher
9668        .relative_position(overlap_pos)
9669        .expect("overlap position should be representable as relative position");
9670    let chain_idx = rel as usize & ((1 << matcher.chain_log) - 1);
9671    assert_ne!(
9672        matcher.chain_table[chain_idx],
9673        rel + 1,
9674        "sparse-grid tail positions must not be reinserted (self-loop chain entry)"
9675    );
9676}
9677
9678#[test]
9679fn hc_compact_history_drains_when_threshold_crossed() {
9680    let mut hc = HcMatchGenerator::new(8);
9681    hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
9682    hc.history_start = 16;
9683    hc.compact_history();
9684    assert_eq!(hc.history_start, 0);
9685    assert_eq!(hc.history, b"qrstuvwxyz");
9686}
9687
9688#[test]
9689fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
9690    let mut hc = HcMatchGenerator::new(32);
9691    hc.history = b"abcdefghijklmnop".to_vec();
9692    hc.history_abs_start = 0;
9693    hc.position_base = 1;
9694    hc.ensure_tables();
9695    let before_hash = hc.hash_table.clone();
9696    let before_chain = hc.chain_table.clone();
9697
9698    hc.insert_position_no_rebase(0);
9699
9700    assert_eq!(hc.hash_table, before_hash);
9701    assert_eq!(hc.chain_table, before_chain);
9702}
9703
9704#[test]
9705fn hc_insert_positions_advances_next_to_update3_for_contiguous_range() {
9706    let mut hc = HcMatchGenerator::new(64);
9707    hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
9708    hc.history_start = 0;
9709    hc.history_abs_start = 0;
9710    hc.position_base = 0;
9711    hc.ensure_tables();
9712    hc.next_to_update3 = 0;
9713
9714    hc.insert_positions(0, 9);
9715
9716    assert_eq!(
9717        hc.next_to_update3, 9,
9718        "contiguous insert_positions should advance hash3 update cursor"
9719    );
9720}
9721
9722#[test]
9723fn hc_insert_positions_with_step_keeps_next_to_update3_cursor_for_sparse_ranges() {
9724    let mut hc = HcMatchGenerator::new(64);
9725    hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
9726    hc.history_start = 0;
9727    hc.history_abs_start = 0;
9728    hc.position_base = 0;
9729    hc.ensure_tables();
9730    hc.next_to_update3 = 0;
9731
9732    hc.insert_positions_with_step(0, 16, 4);
9733
9734    assert_eq!(
9735        hc.next_to_update3, 0,
9736        "sparse insert_positions_with_step must not mark skipped positions as hash3-updated"
9737    );
9738}
9739
9740#[test]
9741fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
9742    let mut driver = MatchGeneratorDriver::new(8, 1);
9743    driver.reset(CompressionLevel::Fastest);
9744    // Use a small live window so dictionary-primed slices are evicted
9745    // quickly and budget retirement can be asserted deterministically.
9746    driver.match_generator.max_window_size = 8;
9747    driver.reported_window_size = 8;
9748
9749    let base_window = driver.match_generator.max_window_size;
9750    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
9751    assert_eq!(driver.match_generator.max_window_size, base_window + 24);
9752
9753    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
9754        let mut space = driver.get_next_space();
9755        space.clear();
9756        space.extend_from_slice(block);
9757        driver.commit_space(space);
9758        driver.skip_matching_with_hint(None);
9759    }
9760
9761    assert_eq!(
9762        driver.dictionary_retained_budget, 0,
9763        "dictionary budget should be fully retired once primed dict slices are evicted"
9764    );
9765    assert_eq!(
9766        driver.match_generator.max_window_size, base_window,
9767        "retired dictionary budget must not remain reusable for live history"
9768    );
9769}
9770
9771#[test]
9772fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
9773    let mut driver = MatchGeneratorDriver::new(8, 1);
9774    driver.reset(CompressionLevel::Level(2));
9775    // Use a small live window in this regression so dictionary-primed slices are
9776    // evicted quickly and budget retirement can be asserted deterministically.
9777    driver.dfast_matcher_mut().max_window_size = 8;
9778    driver.reported_window_size = 8;
9779
9780    let base_window = driver.dfast_matcher().max_window_size;
9781    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
9782    assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
9783
9784    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
9785        let mut space = driver.get_next_space();
9786        space.clear();
9787        space.extend_from_slice(block);
9788        driver.commit_space(space);
9789        driver.skip_matching_with_hint(None);
9790    }
9791
9792    assert_eq!(
9793        driver.dictionary_retained_budget, 0,
9794        "dictionary budget should be fully retired once primed dict slices are evicted"
9795    );
9796    assert_eq!(
9797        driver.dfast_matcher().max_window_size,
9798        base_window,
9799        "retired dictionary budget must not remain reusable for live history"
9800    );
9801}
9802
9803#[test]
9804fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
9805    let mut driver = MatchGeneratorDriver::new(8, 1);
9806    driver.reset(CompressionLevel::Better);
9807
9808    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
9809
9810    let mut space = driver.get_next_space();
9811    space.clear();
9812    // Repeat the dictionary content so the HC matcher can find it.
9813    // HC_MIN_MATCH_LEN is 5, so an 8-byte match is well above threshold.
9814    space.extend_from_slice(b"abcdefgh");
9815    driver.commit_space(space);
9816
9817    let mut saw_match = false;
9818    driver.start_matching(|seq| {
9819        if let Sequence::Triple {
9820            literals,
9821            offset,
9822            match_len,
9823        } = seq
9824            && literals.is_empty()
9825            && offset == 8
9826            && match_len >= HC_MIN_MATCH_LEN
9827        {
9828            saw_match = true;
9829        }
9830    });
9831
9832    assert!(
9833        saw_match,
9834        "hash-chain backend should match dictionary-primed history in first full block"
9835    );
9836}
9837
9838#[test]
9839fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
9840    let mut driver = MatchGeneratorDriver::new(8, 1);
9841    driver.reset(CompressionLevel::Better);
9842    // Use a small live window so dictionary-primed slices are evicted quickly.
9843    driver.hc_matcher_mut().max_window_size = 8;
9844    driver.reported_window_size = 8;
9845
9846    let base_window = driver.hc_matcher().max_window_size;
9847    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
9848    assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
9849
9850    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
9851        let mut space = driver.get_next_space();
9852        space.clear();
9853        space.extend_from_slice(block);
9854        driver.commit_space(space);
9855        driver.skip_matching_with_hint(None);
9856    }
9857
9858    assert_eq!(
9859        driver.dictionary_retained_budget, 0,
9860        "dictionary budget should be fully retired once primed dict slices are evicted"
9861    );
9862    assert_eq!(
9863        driver.hc_matcher().max_window_size,
9864        base_window,
9865        "retired dictionary budget must not remain reusable for live history"
9866    );
9867}
9868
9869#[test]
9870fn hc_rebases_positions_after_u32_boundary() {
9871    let mut matcher = HcMatchGenerator::new(64);
9872    matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
9873    matcher.ensure_tables();
9874    matcher.position_base = 0;
9875    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
9876        Ok(value) => value,
9877        Err(_) => return,
9878    };
9879    // Simulate a long-running stream where absolute history positions crossed
9880    // the u32 range. Before #51 this disabled HC inserts entirely.
9881    matcher.history_abs_start = history_abs_start;
9882    matcher.skip_matching(None);
9883    assert_eq!(
9884        matcher.position_base, matcher.history_abs_start,
9885        "rebase should anchor to the oldest live absolute position"
9886    );
9887
9888    assert!(
9889        matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY),
9890        "HC hash table should still be populated after crossing u32 boundary"
9891    );
9892
9893    // Verify rebasing preserves candidate lookup, not just table population.
9894    let abs_pos = matcher.history_abs_start + 10;
9895    let candidates = matcher.chain_candidates(abs_pos);
9896    assert!(
9897        candidates.iter().any(|candidate| *candidate != usize::MAX),
9898        "chain_candidates should return valid matches after rebase"
9899    );
9900}
9901
9902#[test]
9903fn hc_rebase_rebuilds_only_inserted_prefix() {
9904    let mut matcher = HcMatchGenerator::new(64);
9905    matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
9906    matcher.ensure_tables();
9907    matcher.position_base = 0;
9908    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
9909        Ok(value) => value,
9910        Err(_) => return,
9911    };
9912    matcher.history_abs_start = history_abs_start;
9913    let abs_pos = matcher.history_abs_start + 6;
9914
9915    let mut expected = HcMatchGenerator::new(64);
9916    expected.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
9917    expected.ensure_tables();
9918    expected.history_abs_start = history_abs_start;
9919    expected.position_base = expected.history_abs_start;
9920    expected.hash_table.fill(HC_EMPTY);
9921    expected.chain_table.fill(HC_EMPTY);
9922    for pos in expected.history_abs_start..abs_pos {
9923        expected.insert_position_no_rebase(pos);
9924    }
9925
9926    matcher.maybe_rebase_positions(abs_pos);
9927
9928    assert_eq!(
9929        matcher.position_base, matcher.history_abs_start,
9930        "rebase should still anchor to the oldest live absolute position"
9931    );
9932    assert_eq!(
9933        matcher.hash_table, expected.hash_table,
9934        "rebase must rebuild only positions already inserted before abs_pos"
9935    );
9936    assert_eq!(
9937        matcher.chain_table, expected.chain_table,
9938        "future positions must not be pre-seeded into HC chains during rebase"
9939    );
9940}
9941
9942#[test]
9943fn suffix_store_with_single_slot_does_not_panic_on_keying() {
9944    let mut suffixes = SuffixStore::with_capacity(1);
9945    suffixes.insert(b"abcde", 0);
9946    assert!(suffixes.contains_key(b"abcde"));
9947    assert_eq!(suffixes.get(b"abcde"), Some(0));
9948}
9949
9950#[test]
9951fn fastest_reset_uses_interleaved_hash_fill_step() {
9952    let mut driver = MatchGeneratorDriver::new(32, 2);
9953
9954    driver.reset(CompressionLevel::Uncompressed);
9955    assert_eq!(driver.match_generator.hash_fill_step, 1);
9956
9957    driver.reset(CompressionLevel::Fastest);
9958    assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
9959
9960    // Better uses the HashChain backend with lazy2; verify that the backend switch
9961    // happened and the lazy_depth is configured correctly.
9962    driver.reset(CompressionLevel::Better);
9963    assert_eq!(driver.active_backend, MatcherBackend::HashChain);
9964    assert_eq!(driver.window_size(), (1u64 << 23));
9965    assert_eq!(driver.hc_matcher().lazy_depth, 2);
9966}
9967
9968#[test]
9969fn simple_matcher_updates_offset_history_after_emitting_match() {
9970    let mut matcher = MatchGenerator::new(64);
9971    matcher.add_data(
9972        b"abcdeabcdeabcde".to_vec(),
9973        SuffixStore::with_capacity(64),
9974        |_, _| {},
9975    );
9976
9977    assert!(matcher.next_sequence(|seq| {
9978        assert_eq!(
9979            seq,
9980            Sequence::Triple {
9981                literals: b"abcde",
9982                offset: 5,
9983                match_len: 10,
9984            }
9985        );
9986    }));
9987    assert_eq!(matcher.offset_hist, [5, 1, 4]);
9988}
9989
9990#[test]
9991fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
9992    let mut matcher = MatchGenerator::new(64);
9993    matcher.add_data(
9994        b"abcdefghijabcdefghij".to_vec(),
9995        SuffixStore::with_capacity(64),
9996        |_, _| {},
9997    );
9998
9999    matcher.suffix_idx = 10;
10000    matcher.last_idx_in_sequence = 10;
10001    matcher.offset_hist = [99, 10, 4];
10002
10003    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
10004    assert_eq!(candidate, Some((10, 10)));
10005}
10006
10007#[test]
10008fn simple_matcher_repcode_can_target_previous_window_entry() {
10009    let mut matcher = MatchGenerator::new(64);
10010    matcher.add_data(
10011        b"abcdefghij".to_vec(),
10012        SuffixStore::with_capacity(64),
10013        |_, _| {},
10014    );
10015    matcher.skip_matching();
10016    matcher.add_data(
10017        b"abcdefghij".to_vec(),
10018        SuffixStore::with_capacity(64),
10019        |_, _| {},
10020    );
10021
10022    matcher.offset_hist = [99, 10, 4];
10023
10024    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
10025    assert_eq!(candidate, Some((10, 10)));
10026}
10027
10028#[test]
10029fn simple_matcher_zero_literal_repcode_checks_rep2() {
10030    let mut matcher = MatchGenerator::new(64);
10031    matcher.add_data(
10032        b"abcdefghijabcdefghij".to_vec(),
10033        SuffixStore::with_capacity(64),
10034        |_, _| {},
10035    );
10036    matcher.suffix_idx = 10;
10037    matcher.last_idx_in_sequence = 10;
10038    // rep1=4 does not match at idx 10, rep2=10 does.
10039    matcher.offset_hist = [99, 4, 10];
10040
10041    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
10042    assert_eq!(candidate, Some((10, 10)));
10043}
10044
10045#[test]
10046fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
10047    let mut matcher = MatchGenerator::new(64);
10048    matcher.add_data(
10049        b"abcdefghijabcdefghij".to_vec(),
10050        SuffixStore::with_capacity(64),
10051        |_, _| {},
10052    );
10053    matcher.suffix_idx = 10;
10054    matcher.last_idx_in_sequence = 10;
10055    // rep1=4 and rep2=99 do not match; rep0-1 == 10 does.
10056    matcher.offset_hist = [11, 4, 99];
10057
10058    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
10059    assert_eq!(candidate, Some((10, 10)));
10060}
10061
10062#[test]
10063fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
10064    let mut matcher = MatchGenerator::new(64);
10065    matcher.add_data(
10066        b"abcdefghij".to_vec(),
10067        SuffixStore::with_capacity(64),
10068        |_, _| {},
10069    );
10070    matcher.skip_matching();
10071    matcher.add_data(
10072        b"klmnopqrst".to_vec(),
10073        SuffixStore::with_capacity(64),
10074        |_, _| {},
10075    );
10076    matcher.suffix_idx = 3;
10077
10078    let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
10079    assert_eq!(candidate, None);
10080}
10081
10082#[test]
10083fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
10084    let mut matcher = MatchGenerator::new(64);
10085    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10086    matcher.add_data(
10087        b"abcdefghijklmnop".to_vec(),
10088        SuffixStore::with_capacity(64),
10089        |_, _| {},
10090    );
10091    matcher.skip_matching();
10092    matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
10093
10094    assert!(matcher.next_sequence(|seq| {
10095        assert_eq!(
10096            seq,
10097            Sequence::Triple {
10098                literals: b"",
10099                offset: 15,
10100                match_len: 5,
10101            }
10102        );
10103    }));
10104    assert!(!matcher.next_sequence(|_| {}));
10105}
10106
10107#[test]
10108fn simple_matcher_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
10109    let mut matcher = MatchGenerator::new(128);
10110    let first = b"abcdefghijklmnopqrstuvwxyz012345".to_vec();
10111    let sparse_probe = first[3..3 + MIN_MATCH_LEN].to_vec();
10112    let tail_start = first.len() - MIN_MATCH_LEN;
10113    let tail_probe = first[tail_start..tail_start + MIN_MATCH_LEN].to_vec();
10114    matcher.add_data(first, SuffixStore::with_capacity(256), |_, _| {});
10115
10116    matcher.skip_matching_with_hint(Some(true));
10117
10118    // Observable behavior check: sparse-prefix probe should not immediately match.
10119    matcher.add_data(sparse_probe, SuffixStore::with_capacity(256), |_, _| {});
10120    let mut sparse_first_is_literals = None;
10121    assert!(matcher.next_sequence(|seq| {
10122        if sparse_first_is_literals.is_none() {
10123            sparse_first_is_literals = Some(matches!(seq, Sequence::Literals { .. }));
10124        }
10125    }));
10126    assert!(
10127        sparse_first_is_literals.unwrap_or(false),
10128        "sparse-start probe should not produce an immediate match"
10129    );
10130
10131    // Dense tail remains indexed for cross-block boundary matching.
10132    let mut matcher = MatchGenerator::new(128);
10133    matcher.add_data(
10134        b"abcdefghijklmnopqrstuvwxyz012345".to_vec(),
10135        SuffixStore::with_capacity(256),
10136        |_, _| {},
10137    );
10138    matcher.skip_matching_with_hint(Some(true));
10139    matcher.add_data(tail_probe, SuffixStore::with_capacity(256), |_, _| {});
10140    let mut tail_first_is_immediate_match = None;
10141    assert!(matcher.next_sequence(|seq| {
10142        if tail_first_is_immediate_match.is_none() {
10143            tail_first_is_immediate_match =
10144                Some(matches!(seq, Sequence::Triple { literals, .. } if literals.is_empty()));
10145        }
10146    }));
10147    assert!(
10148        tail_first_is_immediate_match.unwrap_or(false),
10149        "dense tail probe should match immediately at block start"
10150    );
10151}
10152
10153#[test]
10154fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
10155    let mut matcher = MatchGenerator::new(64);
10156    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10157    matcher.add_data(
10158        b"01234abcde".to_vec(),
10159        SuffixStore::with_capacity(64),
10160        |_, _| {},
10161    );
10162    matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
10163
10164    let last = matcher.window.last().unwrap();
10165    let tail = &last.data[5..10];
10166    assert_eq!(last.suffixes.get(tail), Some(5));
10167}
10168
10169#[test]
10170fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
10171    let mut matcher = MatchGenerator::new(128);
10172    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10173    matcher.add_data(
10174        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
10175        SuffixStore::with_capacity(1 << 16),
10176        |_, _| {},
10177    );
10178
10179    matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
10180
10181    let last = matcher.window.last().unwrap();
10182    let first_key = &last.data[..MIN_MATCH_LEN];
10183    assert_eq!(last.suffixes.get(first_key), None);
10184}
10185
10186#[test]
10187fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
10188    let mut matcher = MatchGenerator::new(128);
10189    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10190    matcher.add_data(
10191        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
10192        SuffixStore::with_capacity(1 << 16),
10193        |_, _| {},
10194    );
10195
10196    matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
10197
10198    let last = matcher.window.last().unwrap();
10199    for pos in [0usize, 3, 6, 9, 12] {
10200        let key = &last.data[pos..pos + MIN_MATCH_LEN];
10201        assert_eq!(
10202            last.suffixes.get(key),
10203            Some(pos),
10204            "expected interleaved suffix registration at pos {pos}"
10205        );
10206    }
10207}
10208
10209#[test]
10210fn dfast_skip_matching_handles_window_eviction() {
10211    let mut matcher = DfastMatchGenerator::new(16);
10212
10213    matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
10214    matcher.skip_matching(None);
10215    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
10216    matcher.skip_matching(None);
10217    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
10218
10219    let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
10220    matcher.start_matching(|seq| match seq {
10221        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
10222        Sequence::Triple {
10223            literals,
10224            offset,
10225            match_len,
10226        } => {
10227            reconstructed.extend_from_slice(literals);
10228            let start = reconstructed.len() - offset;
10229            for i in 0..match_len {
10230                let byte = reconstructed[start + i];
10231                reconstructed.push(byte);
10232            }
10233        }
10234    });
10235
10236    assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
10237}
10238
10239#[test]
10240fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
10241    let mut matcher = DfastMatchGenerator::new(8);
10242
10243    let mut first = Vec::with_capacity(64);
10244    first.extend_from_slice(b"abcdefgh");
10245    matcher.add_data(first, |_| {});
10246
10247    let mut second = Vec::with_capacity(64);
10248    second.extend_from_slice(b"ijklmnop");
10249
10250    let mut observed_evicted_len = None;
10251    matcher.add_data(second, |data| {
10252        observed_evicted_len = Some(data.len());
10253    });
10254
10255    assert_eq!(
10256        observed_evicted_len,
10257        Some(8),
10258        "eviction callback must report evicted byte length, not backing capacity"
10259    );
10260}
10261
10262#[test]
10263fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
10264    let mut matcher = DfastMatchGenerator::new(16);
10265
10266    let mut first = Vec::with_capacity(64);
10267    first.extend_from_slice(b"abcdefgh");
10268    matcher.add_data(first, |_| {});
10269
10270    let mut second = Vec::with_capacity(64);
10271    second.extend_from_slice(b"ijklmnop");
10272    matcher.add_data(second, |_| {});
10273
10274    matcher.max_window_size = 8;
10275
10276    let mut observed_evicted_len = None;
10277    matcher.trim_to_window(|data| {
10278        observed_evicted_len = Some(data.len());
10279    });
10280
10281    assert_eq!(
10282        observed_evicted_len,
10283        Some(8),
10284        "trim callback must report evicted byte length, not backing capacity"
10285    );
10286}
10287
10288#[test]
10289fn dfast_inserts_tail_positions_for_next_block_matching() {
10290    let mut matcher = DfastMatchGenerator::new(1 << 22);
10291
10292    matcher.add_data(b"012345bcdea".to_vec(), |_| {});
10293    let mut history = Vec::new();
10294    matcher.start_matching(|seq| match seq {
10295        Sequence::Literals { literals } => history.extend_from_slice(literals),
10296        Sequence::Triple { .. } => unreachable!("first block should not match history"),
10297    });
10298    assert_eq!(history, b"012345bcdea");
10299
10300    matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
10301    let mut saw_first_sequence = false;
10302    matcher.start_matching(|seq| {
10303        assert!(!saw_first_sequence, "expected a single cross-block match");
10304        saw_first_sequence = true;
10305        match seq {
10306            Sequence::Literals { .. } => {
10307                panic!("expected tail-anchored cross-block match before any literals")
10308            }
10309            Sequence::Triple {
10310                literals,
10311                offset,
10312                match_len,
10313            } => {
10314                assert_eq!(literals, b"");
10315                assert_eq!(offset, 5);
10316                assert_eq!(match_len, 11);
10317                let start = history.len() - offset;
10318                for i in 0..match_len {
10319                    let byte = history[start + i];
10320                    history.push(byte);
10321                }
10322            }
10323        }
10324    });
10325
10326    assert!(
10327        saw_first_sequence,
10328        "expected tail-anchored cross-block match"
10329    );
10330    assert_eq!(history, b"012345bcdeabcdeabcdeab");
10331}
10332
10333#[test]
10334fn dfast_dense_skip_matching_backfills_previous_tail_for_next_block() {
10335    let mut matcher = DfastMatchGenerator::new(1 << 22);
10336    let tail = b"Qz9kLm2Rp";
10337    let mut first = b"0123456789abcdef".to_vec();
10338    first.extend_from_slice(tail);
10339    matcher.add_data(first.clone(), |_| {});
10340    matcher.skip_matching(Some(false));
10341
10342    let mut second = tail.to_vec();
10343    second.extend_from_slice(b"after-tail-literals");
10344    matcher.add_data(second, |_| {});
10345
10346    let mut first_sequence = None;
10347    matcher.start_matching(|seq| {
10348        if first_sequence.is_some() {
10349            return;
10350        }
10351        first_sequence = Some(match seq {
10352            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
10353            Sequence::Triple {
10354                literals,
10355                offset,
10356                match_len,
10357            } => (literals.len(), offset, match_len),
10358        });
10359    });
10360
10361    let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
10362    assert_eq!(
10363        lit_len, 0,
10364        "expected immediate cross-block match at block start"
10365    );
10366    assert_eq!(
10367        offset,
10368        tail.len(),
10369        "expected dense skip to preserve cross-boundary tail match"
10370    );
10371    assert!(
10372        match_len >= DFAST_MIN_MATCH_LEN,
10373        "match length should satisfy dfast minimum match length"
10374    );
10375}
10376
10377#[test]
10378fn dfast_sparse_skip_matching_preserves_tail_cross_block_match() {
10379    let mut matcher = DfastMatchGenerator::new(1 << 22);
10380    let tail = b"Qz9kLm2Rp";
10381    let mut first = deterministic_high_entropy_bytes(0x9E37_79B9_7F4A_7C15, 4096);
10382    let tail_start = first.len() - tail.len();
10383    first[tail_start..].copy_from_slice(tail);
10384    matcher.add_data(first.clone(), |_| {});
10385
10386    matcher.skip_matching(Some(true));
10387
10388    let mut second = tail.to_vec();
10389    second.extend_from_slice(b"after-tail-literals");
10390    matcher.add_data(second, |_| {});
10391
10392    let mut first_sequence = None;
10393    matcher.start_matching(|seq| {
10394        if first_sequence.is_some() {
10395            return;
10396        }
10397        first_sequence = Some(match seq {
10398            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
10399            Sequence::Triple {
10400                literals,
10401                offset,
10402                match_len,
10403            } => (literals.len(), offset, match_len),
10404        });
10405    });
10406
10407    let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
10408    assert_eq!(
10409        lit_len, 0,
10410        "expected immediate cross-block match at block start"
10411    );
10412    assert_eq!(
10413        offset,
10414        tail.len(),
10415        "expected match against densely seeded tail"
10416    );
10417    assert!(
10418        match_len >= DFAST_MIN_MATCH_LEN,
10419        "match length should satisfy dfast minimum match length"
10420    );
10421}
10422
10423#[test]
10424fn dfast_skip_matching_dense_backfills_newly_hashable_long_tail_positions() {
10425    let mut matcher = DfastMatchGenerator::new(1 << 22);
10426    let first = deterministic_high_entropy_bytes(0x7A64_0315_D4E1_91C3, 4096);
10427    let first_len = first.len();
10428    matcher.add_data(first, |_| {});
10429    matcher.skip_matching_dense();
10430
10431    // Appending one byte makes exactly the previous block's last 7 starts
10432    // newly eligible for 8-byte long-hash insertion.
10433    matcher.add_data(alloc::vec![0xAB], |_| {});
10434    matcher.skip_matching_dense();
10435
10436    let target_abs_pos = first_len - 7;
10437    let target_rel = target_abs_pos - matcher.history_abs_start;
10438    let live = matcher.live_history();
10439    assert!(
10440        target_rel + 8 <= live.len(),
10441        "fixture must make the boundary start long-hashable"
10442    );
10443    let long_hash = matcher.hash8(&live[target_rel..]);
10444    assert!(
10445        matcher.long_hash[long_hash].contains(&target_abs_pos),
10446        "dense skip must seed long-hash entry for newly hashable boundary start"
10447    );
10448}
10449
10450#[test]
10451fn dfast_seed_remaining_hashable_starts_seeds_last_short_hash_positions() {
10452    let mut matcher = DfastMatchGenerator::new(1 << 20);
10453    let block = deterministic_high_entropy_bytes(0x13F0_9A6D_55CE_7B21, 64);
10454    matcher.add_data(block, |_| {});
10455    matcher.ensure_hash_tables();
10456
10457    let current_len = matcher.window.back().unwrap().len();
10458    let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
10459    let seed_start = current_len - DFAST_MIN_MATCH_LEN;
10460    matcher.seed_remaining_hashable_starts(current_abs_start, current_len, seed_start);
10461
10462    let target_abs_pos = current_abs_start + current_len - 4;
10463    let target_rel = target_abs_pos - matcher.history_abs_start;
10464    let live = matcher.live_history();
10465    assert!(
10466        target_rel + 4 <= live.len(),
10467        "fixture must leave the last short-hash start valid"
10468    );
10469    let short_hash = matcher.hash4(&live[target_rel..]);
10470    assert!(
10471        matcher.short_hash[short_hash].contains(&target_abs_pos),
10472        "tail seeding must include the last 4-byte-hashable start"
10473    );
10474}
10475
10476#[test]
10477fn dfast_seed_remaining_hashable_starts_handles_pos_at_block_end() {
10478    let mut matcher = DfastMatchGenerator::new(1 << 20);
10479    let block = deterministic_high_entropy_bytes(0x7BB2_DA91_441E_C0EF, 64);
10480    matcher.add_data(block, |_| {});
10481    matcher.ensure_hash_tables();
10482
10483    let current_len = matcher.window.back().unwrap().len();
10484    let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
10485    matcher.seed_remaining_hashable_starts(current_abs_start, current_len, current_len);
10486
10487    let target_abs_pos = current_abs_start + current_len - 4;
10488    let target_rel = target_abs_pos - matcher.history_abs_start;
10489    let live = matcher.live_history();
10490    assert!(
10491        target_rel + 4 <= live.len(),
10492        "fixture must leave the last short-hash start valid"
10493    );
10494    let short_hash = matcher.hash4(&live[target_rel..]);
10495    assert!(
10496        matcher.short_hash[short_hash].contains(&target_abs_pos),
10497        "tail seeding must still include the last 4-byte-hashable start when pos is at block end"
10498    );
10499}
10500
10501#[test]
10502fn dfast_sparse_skip_matching_backfills_previous_tail_for_consecutive_sparse_blocks() {
10503    let mut matcher = DfastMatchGenerator::new(1 << 22);
10504    let boundary_prefix = [0xFA, 0xFB, 0xFC];
10505    let boundary_suffix = [0xFD, 0xEE, 0xAD, 0xBE, 0xEF, 0x11, 0x22, 0x33];
10506
10507    let mut first = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
10508    let first_tail_start = first.len() - boundary_prefix.len();
10509    first[first_tail_start..].copy_from_slice(&boundary_prefix);
10510    matcher.add_data(first, |_| {});
10511    matcher.skip_matching(Some(true));
10512
10513    let mut second = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
10514    second[..boundary_suffix.len()].copy_from_slice(&boundary_suffix);
10515    matcher.add_data(second.clone(), |_| {});
10516    matcher.skip_matching(Some(true));
10517
10518    let mut third = boundary_prefix.to_vec();
10519    third.extend_from_slice(&boundary_suffix);
10520    third.extend_from_slice(b"-trailing-literals");
10521    matcher.add_data(third, |_| {});
10522
10523    let mut first_sequence = None;
10524    matcher.start_matching(|seq| {
10525        if first_sequence.is_some() {
10526            return;
10527        }
10528        first_sequence = Some(match seq {
10529            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
10530            Sequence::Triple {
10531                literals,
10532                offset,
10533                match_len,
10534            } => (literals.len(), offset, match_len),
10535        });
10536    });
10537
10538    let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
10539    assert_eq!(
10540        lit_len, 0,
10541        "expected immediate match from the prior sparse-skip boundary"
10542    );
10543    assert_eq!(
10544        offset,
10545        second.len() + boundary_prefix.len(),
10546        "expected match against backfilled first→second boundary start"
10547    );
10548    assert!(
10549        match_len >= DFAST_MIN_MATCH_LEN,
10550        "match length should satisfy dfast minimum match length"
10551    );
10552}
10553
10554#[test]
10555fn fastest_hint_iteration_23_sequences_reconstruct_source() {
10556    fn generate_data(seed: u64, len: usize) -> Vec<u8> {
10557        let mut state = seed;
10558        let mut data = Vec::with_capacity(len);
10559        for _ in 0..len {
10560            state = state
10561                .wrapping_mul(6364136223846793005)
10562                .wrapping_add(1442695040888963407);
10563            data.push((state >> 33) as u8);
10564        }
10565        data
10566    }
10567
10568    let i = 23u64;
10569    let len = (i * 89 % 16384) as usize;
10570    let mut data = generate_data(i, len);
10571    // Append a repeated slice so the fixture deterministically exercises
10572    // the match path (Sequence::Triple) instead of only literals.
10573    let repeat = data[128..256].to_vec();
10574    data.extend_from_slice(&repeat);
10575    data.extend_from_slice(&repeat);
10576
10577    let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
10578    driver.set_source_size_hint(data.len() as u64);
10579    driver.reset(CompressionLevel::Fastest);
10580    let mut space = driver.get_next_space();
10581    space[..data.len()].copy_from_slice(&data);
10582    space.truncate(data.len());
10583    driver.commit_space(space);
10584
10585    let mut rebuilt = Vec::with_capacity(data.len());
10586    let mut saw_triple = false;
10587    driver.start_matching(|seq| match seq {
10588        Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
10589        Sequence::Triple {
10590            literals,
10591            offset,
10592            match_len,
10593        } => {
10594            saw_triple = true;
10595            rebuilt.extend_from_slice(literals);
10596            assert!(offset > 0, "offset must be non-zero");
10597            assert!(
10598                offset <= rebuilt.len(),
10599                "offset must reference already-produced bytes: offset={} produced={}",
10600                offset,
10601                rebuilt.len()
10602            );
10603            let start = rebuilt.len() - offset;
10604            for idx in 0..match_len {
10605                let b = rebuilt[start + idx];
10606                rebuilt.push(b);
10607            }
10608        }
10609    });
10610
10611    assert!(saw_triple, "fixture must emit at least one match");
10612    assert_eq!(rebuilt, data);
10613}