structured_zstd/encoding/
match_generator.rs

1//! Matching algorithm used find repeated parts in the original data
2//!
3//! The Zstd format relies on finden repeated sequences of data and compressing these sequences as instructions to the decoder.
4//! A sequence basically tells the decoder "Go back X bytes and copy Y bytes to the end of your decode buffer".
5//!
6//! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data.
7
8use alloc::vec::Vec;
9// SIMD/CRC intrinsics now live in `crate::encoding::fastpath::*` where they
10// sit under per-CPU `#[target_feature]` umbrellas; no architecture-specific
11// intrinsic imports remain in this file.
12use super::CompressionLevel;
13use super::Matcher;
14use super::Sequence;
15use super::blocks::encode_offset_with_history;
16use super::bt::BtMatcher;
17#[cfg(test)]
18use super::cost_model::HC_MAX_LIT;
19use super::cost_model::{
20    HC_BITCOST_MULTIPLIER, HC_FORMAT_MINMATCH, HC_OPT_NODE_LEN, HC_OPT_NUM, HC_OPT_PRICE_ARENA_LEN,
21    HC_OPT_PRICE_STRIDE, HC_PREDEF_THRESHOLD, HcOptState, HcOptimalCostProfile,
22};
23#[cfg(test)]
24use super::cost_model::{HC_BLOCKSIZE_MAX, HC_MAX_LL, HC_MAX_ML, HC_MAX_OFF, HcOptPriceType};
25use super::dfast::DfastMatchGenerator;
26// FAST_HASH_FILL_STEP test-only re-export was tied to the legacy
27// SuffixStore MatchGenerator's interleaved hash-fill stride. The
28// donor-shape Fast kernel walks ip0 with kSearchStrength step-skip
29// acceleration instead, so the constant has no consumer in the
30// remaining live test set today.
31#[cfg(test)]
32use super::match_table::helpers::INCOMPRESSIBLE_SKIP_STEP;
33use super::match_table::helpers::MIN_MATCH_LEN;
34#[cfg(test)]
35use super::match_table::helpers::common_prefix_len;
36#[cfg(test)]
37use super::opt::ldm::HcRawSeq;
38use super::opt::ldm::{HcOptLdmState, HcRawSeqStore};
39use super::opt::types::{
40    HcCandidateQuery, HcOptimalNode, HcOptimalPlanBuffers, HcOptimalPlanState, HcOptimalSequence,
41    MatchCandidate,
42};
43use super::row::RowMatchGenerator;
44use super::simple::fast_matcher::{FAST_LEVEL_1_HASH_LOG, FAST_LEVEL_1_MLS, FastKernelMatcher};
45#[cfg(all(
46    test,
47    feature = "std",
48    target_arch = "aarch64",
49    target_endian = "little"
50))]
51use std::arch::is_aarch64_feature_detected;
52#[cfg(all(test, feature = "std", target_arch = "x86_64"))]
53use std::arch::is_x86_feature_detected;
54
55pub(crate) const DFAST_MIN_MATCH_LEN: usize = 5;
56// Bytes the dfast short hash reads (donor `mls = 5`). Seeding / lookahead
57// guards use it so a position is only short-hashed once its full 5-byte key
58// is in range.
59pub(crate) const DFAST_SHORT_HASH_LOOKAHEAD: usize = 5;
60pub(crate) const ROW_MIN_MATCH_LEN: usize = 5;
61// Donor `clevels.h:31` at level 3 large-input bucket sets
62// `hashLog = 17` (the long-hash table) and `chainLog = 16` (the
63// short-hash table — donor names this `chainTable` even though for
64// dfast it's used as a plain single-slot hash). Each table holds one
65// `U32` per slot; the donor overwrites on collision and recovers
66// compression quality via the inline `_search_next_long` retry
67// (after a short-hash hit, probes `hashLong[hl1]` at `ip + 1` and
68// keeps the longer match).
69//
70// We mirror that storage layout: single `u32` per bucket (no
71// `[u32; N]` array), `long_hash` sized `1 << DFAST_HASH_BITS` and
72// `short_hash` one bit smaller via `DFAST_SHORT_HASH_BITS_DELTA`.
73// Two-table footprint at Level 3: `2^17 × 4 + 2^16 × 4 = 768 KiB`,
74// exact upstream parity. The `_search_next_long` retry lives in
75// `DfastMatchGenerator::hash_candidate` (called via
76// `best_match`). Earlier revisions kept a
77// 4-slot bucket per hash position; that paid 4× the donor memory
78// without measurable ratio gain once the retry was in place.
79//
80// `dfast_hash_bits_for_window` still clamps the runtime long-hash
81// value to `[MIN_WINDOW_LOG, DFAST_HASH_BITS]`, so this const is the
82// upper bound rather than a fixed default.
83pub(crate) const DFAST_HASH_BITS: usize = 17;
84/// Difference between `long_hash_bits` and `short_hash_bits` —
85/// donor `hashLog - chainLog` is 1 at every dfast level (`clevels.h`
86/// level 2: 16-15=1; level 3: 17-16=1). The short hash is one bit
87/// smaller than the long hash so the per-bucket footprint matches
88/// donor sizing exactly.
89pub(crate) const DFAST_SHORT_HASH_BITS_DELTA: usize = 1;
90/// Sentinel value for an empty slot in the dfast hash tables. Real
91/// positions are stored as `(abs_pos - position_base + 1) as u32`, so
92/// `0` is reserved as the "empty" marker and a true relative offset
93/// of `0` never appears in the table. Mirrors the LDM table's
94/// `LdmEntry.offset == 0` convention (see `encoding/ldm/table.rs`)
95/// so both rebasing structures share
96/// one sentinel scheme.
97pub(crate) const DFAST_EMPTY_SLOT: u32 = 0;
98
99/// Guard band reserved above the high-water mark before triggering a
100/// rebase on the Dfast hash tables. When the next insert would push a
101/// relative offset above `u32::MAX - DFAST_REBASE_GUARD_BAND`, the
102/// table calls `reduce(GUARD_BAND)` to shift every slot down and
103/// advance `position_base` so future inserts stay inside the `u32`
104/// window. Same scheme as `encoding/ldm/table.rs`.
105pub(crate) const DFAST_REBASE_GUARD_BAND: u32 = 1u32 << 30;
106pub(crate) const DFAST_SKIP_SEARCH_STRENGTH: usize = 6;
107pub(crate) const DFAST_SKIP_STEP_GROWTH_INTERVAL: usize = 1 << DFAST_SKIP_SEARCH_STRENGTH;
108pub(crate) const DFAST_MAX_SKIP_STEP: usize = 8;
109pub(crate) const DFAST_INCOMPRESSIBLE_SKIP_STEP: usize = 16;
110pub(crate) const ROW_HASH_BITS: usize = 20;
111pub(crate) const ROW_LOG: usize = 5;
112pub(crate) const ROW_SEARCH_DEPTH: usize = 16;
113pub(crate) const ROW_TARGET_LEN: usize = 48;
114pub(crate) const ROW_TAG_BITS: usize = 8;
115pub(crate) const ROW_EMPTY_SLOT: u32 = u32::MAX;
116pub(crate) const ROW_HASH_KEY_LEN: usize = 4;
117// HASH_MIX_PRIME now lives in `crate::encoding::fastpath::scalar`; the four
118// per-CPU `hash_mix_u64` variants share it via that module.
119// HC_PRIME3BYTES / HC_PRIME4BYTES moved to match_table::storage
120// alongside the hash helpers in Phase 1e Stage A. Only the test
121// module references the constants directly (production code goes
122// through `MatchTable::hash_value_with_mls`).
123#[cfg(test)]
124use super::match_table::storage::{HC_PRIME3BYTES, HC_PRIME4BYTES};
125
126// HC_HASH_LOG / HC_CHAIN_LOG / HC3_HASH_LOG / HC_EMPTY live on the
127// shared storage module so MatchTable methods can reference them
128// without pulling in this module. Re-imported here so existing
129// macros / configs / tests keep their unqualified names.
130#[cfg(test)]
131use super::match_table::storage::HC_EMPTY;
132use super::match_table::storage::HC3_HASH_LOG;
133// HC_HASH_LOG / HC_CHAIN_LOG feed the test-only `HC_CONFIG` default.
134#[cfg(test)]
135use super::match_table::storage::{HC_CHAIN_LOG, HC_HASH_LOG};
136// HC3_MAX_OFFSET moved to encoding::bt alongside the hash3 candidate
137// probe macro that consumes it; the macro references it via the
138// fully-qualified `$crate::encoding::bt::HC3_MAX_OFFSET` path so this
139// module no longer needs a local import.
140const HC_SEARCH_DEPTH: usize = 16;
141// HC_MIN_MATCH_LEN moved to encoding::hc; re-imported here so
142// existing references compile unchanged.
143use super::hc::HC_MIN_MATCH_LEN;
144const HC_OPT_MIN_MATCH_LEN: usize = HC_FORMAT_MINMATCH;
145const HC_TARGET_LEN: usize = 48;
146
147// MAX_HC_SEARCH_DEPTH moved to encoding::hc alongside chain_candidates.
148use super::hc::MAX_HC_SEARCH_DEPTH;
149
150// `Strategy` and `StrategyTag` live in `crate::encoding::strategy`.
151// The driver carries a `StrategyTag` field set at `reset()` and
152// dispatches each block into a monomorphised `compress_block::<S>`
153// per concrete strategy.
154
155/// Bundled tuning knobs for the hash-chain matcher. Using a typed config
156/// instead of positional `usize` args eliminates parameter-order hazards.
157#[derive(Copy, Clone, PartialEq, Eq)]
158struct HcConfig {
159    hash_log: usize,
160    chain_log: usize,
161    search_depth: usize,
162    target_len: usize,
163    /// Binary-tree finder hash width (donor `mls = BOUNDED(4, minMatch, 6)`),
164    /// carried explicitly per level so it is NOT inferred from `target_len`
165    /// (a `target_length` override must not silently flip the finder between
166    /// 5- and 4-byte hashing). Only the BT body reads it; HC/lazy levels keep
167    /// it at 4 (their `hash_position` is always 4-byte). 5 for the
168    /// minMatch=5 BT levels (btlazy2 + btopt L16), 4 elsewhere.
169    search_mls: usize,
170}
171
172#[derive(Copy, Clone, PartialEq, Eq)]
173pub(crate) struct RowConfig {
174    pub(crate) hash_bits: usize,
175    pub(crate) row_log: usize,
176    pub(crate) search_depth: usize,
177    pub(crate) target_len: usize,
178    /// Donor `cParams.minMatch` for the row matcher: the regular-search
179    /// acceptance floor (a row candidate must extend to >= `mls` bytes).
180    /// The C-like advanced API surfaces this as the row min-match knob.
181    /// `ROW_MIN_MATCH_LEN` (5) is the default; the row hash key width stays
182    /// 4 bytes (an internal detail), so this only tunes the acceptance
183    /// floor, not the candidate hash distribution.
184    pub(crate) mls: usize,
185}
186
187// Only used as the default HashChain config when the test-only parse×search
188// override pairs a level with a backend its native row doesn't populate.
189#[cfg(test)]
190const HC_CONFIG: HcConfig = HcConfig {
191    hash_log: HC_HASH_LOG,
192    chain_log: HC_CHAIN_LOG,
193    search_depth: HC_SEARCH_DEPTH,
194    target_len: HC_TARGET_LEN,
195    search_mls: 4,
196};
197
198/// Base HashChain config synthesized when a public-parameter strategy
199/// override ([`super::parameters`]) routes a level to the HC / BT
200/// backend whose native level row didn't populate `hc` (e.g. forcing
201/// `Strategy::Lazy2` onto a level the table resolves to Fast). Mirrors
202/// the mid-band lazy defaults; the per-knob overrides then refine it.
203const HC_OVERRIDE_DEFAULT: HcConfig = HcConfig {
204    hash_log: super::match_table::storage::HC_HASH_LOG,
205    chain_log: super::match_table::storage::HC_CHAIN_LOG,
206    search_depth: HC_SEARCH_DEPTH,
207    target_len: HC_TARGET_LEN,
208    search_mls: 4,
209};
210
211const BTULTRA2_HC_CONFIG: HcConfig = HcConfig {
212    hash_log: 24,
213    chain_log: 24,
214    search_depth: 512,
215    target_len: 256,
216    search_mls: 4,
217};
218
219const BTULTRA2_HC_CONFIG_L22: HcConfig = HcConfig {
220    hash_log: 25,
221    chain_log: 27,
222    search_depth: 512,
223    target_len: 999,
224    search_mls: 4,
225};
226
227const BTULTRA2_HC_CONFIG_L22_256K: HcConfig = HcConfig {
228    hash_log: 19,
229    chain_log: 19,
230    search_depth: 1 << 13,
231    target_len: 999,
232    search_mls: 4,
233};
234
235const BTULTRA2_HC_CONFIG_L22_128K: HcConfig = HcConfig {
236    hash_log: 17,
237    chain_log: 18,
238    search_depth: 1 << 11,
239    target_len: 999,
240    search_mls: 4,
241};
242
243const BTULTRA2_HC_CONFIG_L22_16K: HcConfig = HcConfig {
244    hash_log: 15,
245    chain_log: 15,
246    search_depth: 1 << 10,
247    target_len: 999,
248    search_mls: 4,
249};
250
251// Default Row config: only used by tests and the test-only parse×search
252// override (production greedy L5 carries its own `ROW_L5`).
253#[cfg(test)]
254const ROW_CONFIG: RowConfig = RowConfig {
255    hash_bits: ROW_HASH_BITS,
256    row_log: ROW_LOG,
257    search_depth: ROW_SEARCH_DEPTH,
258    target_len: ROW_TARGET_LEN,
259    mls: ROW_MIN_MATCH_LEN,
260};
261
262// Level-5 greedy is the ONLY strategy routed to the Row backend
263// (`StrategyTag::backend`: greedy -> Row; lazy / btopt / btultra* ->
264// HashChain), so it is the only level whose `row:` field is read. The donor
265// `clevels.h` default row (srcSize > 256 KB) for level 5 is searchLog=3,
266// targetLength=2, from which the row matcher derives:
267//   rowLog       = clamp(searchLog, 4, 6) = 4
268//   search_depth = 1 << min(searchLog, rowLog) = 8   (= nbAttempts)
269//   target_len   = targetLength = 2                  (nice-match early-out)
270// The shared `ROW_CONFIG` (row_log=5, search_depth=16, target_len=48) ran a
271// level-12-grade search here: 16 slots per row, never early-exiting until a
272// 48-byte match. That exhaustive walk was the dominant cost in greedy L5's
273// encode-speed regression vs FFI. `hash_bits` matches upstream zstd's
274// `ZSTD_getCParams(5, .., 0).hashLog` = 19 (verified via
275// `donor_cparams_check 5`), so the row table is the same width as upstream's
276// (2^19 slots); the previous `ROW_HASH_BITS` (20) doubled both row tables vs
277// upstream, the dominant peak-memory excess on the greedy band.
278const ROW_L5: RowConfig = RowConfig {
279    hash_bits: 19,
280    row_log: 4,
281    search_depth: 8,
282    target_len: 2,
283    mls: ROW_MIN_MATCH_LEN,
284};
285
286// Donor `clevels.h` unbounded defaults for the lazy band, verified via
287// `ZSTD_getCParams(level, 0, 0)`:
288//   L6  { w21 c18 h19 s3 mml5 t4  lazy  } → rowLog 4, depth 1<<3 = 8
289//   L7  { w21 c19 h20 s4 mml5 t8  lazy  } → rowLog 4, depth 16
290//   L8  { w21 c19 h20 s4 mml5 t16 lazy2 } → rowLog 4, depth 16
291//   L9  { w22 c20 h21 s4 mml5 t16 lazy2 } → rowLog 4, depth 16
292//   L10 { w22 c21 h22 s5 mml5 t16 lazy2 } → rowLog 5, depth 32
293//   L11 { w22 c21 h22 s6 mml5 t16 lazy2 } → rowLog 6, depth 64
294//   L12 { w22 c22 h23 s6 mml5 t32 lazy2 } → rowLog 6, depth 64
295// `rowLog = clamp(searchLog, 4, 6)`, `depth = 1 << min(searchLog, rowLog)`
296// (same derivation as `ROW_L5` above). `hash_bits` carries the donor
297// `hashLog`; the hinted-source clamp in `configure` caps it by the window
298// exactly like the donor `ZSTD_adjustCParams` path.
299const ROW_L6: RowConfig = RowConfig {
300    hash_bits: 19,
301    row_log: 4,
302    search_depth: 8,
303    target_len: 4,
304    mls: ROW_MIN_MATCH_LEN,
305};
306const ROW_L7: RowConfig = RowConfig {
307    hash_bits: 20,
308    row_log: 4,
309    search_depth: 16,
310    target_len: 8,
311    mls: ROW_MIN_MATCH_LEN,
312};
313const ROW_L8: RowConfig = RowConfig {
314    hash_bits: 20,
315    row_log: 4,
316    search_depth: 16,
317    target_len: 16,
318    mls: ROW_MIN_MATCH_LEN,
319};
320const ROW_L9: RowConfig = RowConfig {
321    hash_bits: 21,
322    row_log: 4,
323    search_depth: 16,
324    target_len: 16,
325    mls: ROW_MIN_MATCH_LEN,
326};
327const ROW_L10: RowConfig = RowConfig {
328    hash_bits: 22,
329    row_log: 5,
330    search_depth: 32,
331    target_len: 16,
332    mls: ROW_MIN_MATCH_LEN,
333};
334const ROW_L11: RowConfig = RowConfig {
335    hash_bits: 22,
336    row_log: 6,
337    search_depth: 64,
338    target_len: 16,
339    mls: ROW_MIN_MATCH_LEN,
340};
341const ROW_L12: RowConfig = RowConfig {
342    hash_bits: 23,
343    row_log: 6,
344    search_depth: 64,
345    target_len: 32,
346    mls: ROW_MIN_MATCH_LEN,
347};
348
349/// Per-level Double-Fast hash sizing, mirroring the donor `clevels.h` columns
350/// (config-driven, not a hardcoded constant): `long_hash_log` =
351/// `cParams.hashLog` (the long 8-byte hash table), `short_hash_log` =
352/// `cParams.chainLog` (the short hash table dfast repurposes as its
353/// secondary index). Only the Dfast backend reads it, so non-dfast level
354/// rows carry `dfast: None`. `minMatch` stays the donor-fixed `5`
355/// (`DFAST_MIN_MATCH_LEN`, used in const contexts).
356#[derive(Copy, Clone, PartialEq, Eq)]
357struct DfastConfig {
358    long_hash_log: u8,
359    short_hash_log: u8,
360}
361
362// Donor clevels.h default row (srcSize > 256 KB): L3 {hashLog 17, chainLog 16},
363// L4 {hashLog 18, chainLog 18}.
364const DFAST_L3: DfastConfig = DfastConfig {
365    long_hash_log: 17,
366    short_hash_log: 16,
367};
368const DFAST_L4: DfastConfig = DfastConfig {
369    long_hash_log: 18,
370    short_hash_log: 18,
371};
372
373/// Per-level Fast-strategy tuning, only consumed by the `FastKernelMatcher`
374/// (Simple backend): `hash_log` = donor `cParams.hashLog`, `mls` = donor
375/// `cParams.minMatch` (4..=8), `step_size` = donor `stepSize`. Carried as
376/// `LevelParams.fast` (`Some` only on Fast level rows; `None` elsewhere).
377#[derive(Copy, Clone, PartialEq, Eq)]
378struct FastConfig {
379    hash_log: u32,
380    mls: u32,
381    step_size: usize,
382}
383
384const FAST_L1: FastConfig = FastConfig {
385    hash_log: 14,
386    mls: 7,
387    step_size: 2,
388};
389const FAST_L2: FastConfig = FastConfig {
390    hash_log: 16,
391    mls: 6,
392    step_size: 2,
393};
394
395/// Resolved tuning parameters for a compression level. The
396/// [`StrategyTag`] is the single source of truth for the backend
397/// family and the compile-time strategy consts; the runtime
398/// [`BackendTag`] used by the driver dispatcher is derived via
399/// [`StrategyTag::backend`] so the two cannot drift.
400#[derive(Copy, Clone, PartialEq, Eq)]
401struct LevelParams {
402    strategy_tag: super::strategy::StrategyTag,
403    /// Decoupled search-method axis. Independent of `strategy_tag`'s
404    /// parse half: a level can pair any parse (greedy / lazy depth via
405    /// `lazy_depth`) with any search backend here. Defaults to the
406    /// historical pairing (`strategy_tag.search()`) but is overridable
407    /// per level so the parse×search matrix can be swept and tuned.
408    search: super::strategy::SearchMethod,
409    window_log: u8,
410    lazy_depth: u8,
411    /// Per-strategy tuning. Exactly one is `Some` on each level row, matching
412    /// `strategy_tag`'s backend, so the table self-documents which knobs a
413    /// level actually consumes (the others are `None`, not dead placeholders):
414    /// `fast` for the Fast/Simple backend, `dfast` for Double-Fast, `hc` for
415    /// the HashChain (lazy / btopt / btultra*) backend, `row` for the Row
416    /// (greedy L5) backend.
417    fast: Option<FastConfig>,
418    dfast: Option<DfastConfig>,
419    hc: Option<HcConfig>,
420    row: Option<RowConfig>,
421}
422
423impl LevelParams {
424    /// Backend family (storage variant) for the driver dispatcher.
425    /// Derived from the decoupled `search` axis so a level can route to
426    /// a different search backend than its `strategy_tag` historically
427    /// implied.
428    fn backend(&self) -> super::strategy::BackendTag {
429        self.search.backend()
430    }
431
432    /// Parse mode derived from the decoupled `search` axis: the binary-tree
433    /// search path carries `ParseMode::Optimal`; every other search backend
434    /// derives greedy/lazy/lazy2 from `lazy_depth`. Reading `search` (not the
435    /// strategy tag) keeps the parse×search decoupling complete even when a
436    /// level whose tag is `Bt*` is overridden to a non-BT search backend.
437    fn parse(&self) -> super::strategy::ParseMode {
438        match self.search {
439            super::strategy::SearchMethod::BinaryTree => super::strategy::ParseMode::Optimal,
440            _ => super::strategy::ParseMode::from_lazy_depth(self.lazy_depth),
441        }
442    }
443
444    /// Cheap fingerprint pre-splitter level, the C-like `blockSplitterLevel`
445    /// knob. Mirrors the donor `splitLevels[]` table indexed by strategy in
446    /// `ZSTD_optimalBlockSize` (`{0,0,1,2,2,3,3,4,4,4}` over fast..btultra2):
447    /// fast=0, dfast=1, greedy=2, lazy=2, lazy2=3, btlazy2=3,
448    /// btopt/btultra/btultra2=4. We collapse the donor `lazy2` and `btlazy2`
449    /// strategies into the hash-chain `Lazy` tag, distinguished here by
450    /// `lazy_depth` (the level table runs both at depth 2), so depth 2 routes
451    /// to split level 3 to match the donor. `split_level == 0` routes to the
452    /// cheap from-borders heuristic; `1..=4` to byChunks with internal
453    /// sampling level `split_level - 1`. The `savings >= 3` gate in
454    /// `optimal_block_size` keeps incompressible data and the first full block
455    /// whole, so homogeneous frames are not over-split.
456    fn pre_split(&self) -> Option<u8> {
457        match self.strategy_tag {
458            super::strategy::StrategyTag::Fast => Some(0),
459            super::strategy::StrategyTag::Dfast => Some(1),
460            super::strategy::StrategyTag::Greedy => Some(2),
461            // The lazy2 / btlazy2 band (Lazy at lazy_depth >= 2, and Btlazy2)
462            // uses the rate-1 full-scan chunk splitter (4), NOT the rate-5
463            // sampler (3). The rate-5 sampler combined with the larger
464            // hash_log is sensitive enough to register a phantom statistical
465            // transition on perfectly homogeneous but periodic input (e.g. a
466            // repeating log-line stream whose period does not divide the 8 KB
467            // chunk size): the sampled bytes land on a different phase in each
468            // chunk, so two identical-distribution chunks look different and
469            // the block is split at 8 KB, then re-split on every window,
470            // cascading a large stream into hundreds of tiny blocks whose
471            // per-block headers dwarf the payload. The rate-1 scan reads every
472            // byte, so it sees periodic data as uniform and declines to split,
473            // while still finding genuine content boundaries (measured better
474            // ratio on the real decode corpus, and no longer expands a
475            // periodic stream vs a single full block). lazy/greedy keep the
476            // coarse samplers (lower hash_log => not sensitive enough to
477            // alias here).
478            super::strategy::StrategyTag::Lazy => {
479                if self.lazy_depth >= 2 {
480                    Some(4)
481                } else {
482                    Some(2)
483                }
484            }
485            super::strategy::StrategyTag::Btlazy2 => Some(4),
486            super::strategy::StrategyTag::BtOpt
487            | super::strategy::StrategyTag::BtUltra
488            | super::strategy::StrategyTag::BtUltra2 => Some(4),
489        }
490    }
491}
492
493/// Apply the public-parameter per-knob overrides (#27) onto the
494/// level-resolved [`LevelParams`], in place. Runs in [`Matcher::reset`]
495/// after the level params are computed and before backend selection, so
496/// a strategy override re-routes the backend uniformly. An all-`None`
497/// override is a no-op the caller skips via
498/// [`super::parameters::ParamOverrides::is_empty`], keeping the default
499/// level geometry byte-identical.
500fn apply_param_overrides(params: &mut LevelParams, ov: &super::parameters::ParamOverrides) {
501    use super::strategy::SearchMethod;
502
503    // 1. Strategy override re-derives tag / search / lazy depth.
504    if let Some(strategy) = ov.strategy {
505        let tag = strategy.tag();
506        params.strategy_tag = tag;
507        params.search = tag.search();
508        params.lazy_depth = strategy.lazy_depth();
509    }
510
511    // 2. Ensure the active backend's config row exists (synthesize a
512    //    default when a strategy override moved off the native row).
513    match params.search {
514        SearchMethod::Fast => {
515            params.fast.get_or_insert(FAST_L1);
516        }
517        SearchMethod::DoubleFast => {
518            params.dfast.get_or_insert(DFAST_L3);
519        }
520        SearchMethod::RowHash => {
521            params.row.get_or_insert(ROW_L5);
522        }
523        SearchMethod::HashChain | SearchMethod::BinaryTree => {
524            // A `Btlazy2` strategy override moved off a non-HC row needs the
525            // BT 5-byte finder hash (donor minMatch 5); other synthesized HC
526            // rows keep the 4-byte default. An explicit `min_match` override
527            // below refines this further.
528            params.hc.get_or_insert(HcConfig {
529                search_mls: if matches!(params.strategy_tag, super::strategy::StrategyTag::Btlazy2)
530                {
531                    5
532                } else {
533                    HC_OVERRIDE_DEFAULT.search_mls
534                },
535                ..HC_OVERRIDE_DEFAULT
536            });
537        }
538    }
539
540    // 3. window_log (bounds-checked at <= 30 by the builder).
541    if let Some(window_log) = ov.window_log {
542        params.window_log = window_log;
543    }
544
545    // 4. Per-backend numeric knobs map into the active config, mirroring
546    //    the donor `cParams` -> matcher translation documented on each
547    //    config struct.
548    match params.search {
549        SearchMethod::Fast => {
550            if let Some(fast) = params.fast.as_mut() {
551                if let Some(hash_log) = ov.hash_log {
552                    fast.hash_log = hash_log;
553                }
554                if let Some(min_match) = ov.min_match {
555                    fast.mls = min_match;
556                }
557            }
558        }
559        SearchMethod::DoubleFast => {
560            if let Some(dfast) = params.dfast.as_mut() {
561                // hashLog -> long table, chainLog -> short table (the
562                // dfast secondary index). Both bounds-checked <= 30, so
563                // the `u8` casts are lossless.
564                if let Some(hash_log) = ov.hash_log {
565                    dfast.long_hash_log = hash_log as u8;
566                }
567                if let Some(chain_log) = ov.chain_log {
568                    dfast.short_hash_log = chain_log as u8;
569                }
570            }
571        }
572        SearchMethod::RowHash => {
573            if let Some(row) = params.row.as_mut() {
574                // Row hash-table width override (mirrors dfast `long_hash_log`
575                // / hc `hash_log`). Row has no separate chain table — the
576                // per-row depth comes from `search_log` below — so only
577                // `hash_log` maps here; `chain_log` has no Row analogue.
578                if let Some(hash_log) = ov.hash_log {
579                    row.hash_bits = hash_log as usize;
580                }
581                if let Some(search_log) = ov.search_log {
582                    // Donor: rowLog = clamp(searchLog, 4, 6);
583                    //        nbAttempts = 1 << min(searchLog, rowLog).
584                    let row_log = (search_log as usize).clamp(4, 6);
585                    row.row_log = row_log;
586                    row.search_depth = 1usize << (search_log as usize).min(row_log);
587                }
588                if let Some(target_length) = ov.target_length {
589                    row.target_len = target_length as usize;
590                }
591                if let Some(min_match) = ov.min_match {
592                    row.mls = min_match as usize;
593                }
594            }
595        }
596        SearchMethod::HashChain | SearchMethod::BinaryTree => {
597            if let Some(hc) = params.hc.as_mut() {
598                if let Some(hash_log) = ov.hash_log {
599                    hc.hash_log = hash_log as usize;
600                }
601                if let Some(chain_log) = ov.chain_log {
602                    hc.chain_log = chain_log as usize;
603                }
604                if let Some(search_log) = ov.search_log {
605                    hc.search_depth = 1usize << search_log;
606                }
607                if let Some(target_length) = ov.target_length {
608                    hc.target_len = target_length as usize;
609                }
610                if let Some(min_match) = ov.min_match {
611                    // Donor `mls = BOUNDED(4, cParams.minMatch, 6)`: a BT
612                    // min_match override maps into the finder hash width. Only
613                    // the BT body reads `search_mls`; HC/lazy keep 4-byte
614                    // hashing regardless, so this is a no-op for them.
615                    hc.search_mls = (min_match as usize).clamp(4, 6);
616                }
617            }
618        }
619    }
620}
621
622/// Map the resolved runtime strategy to the donor LDM strategy ordinal
623/// (1..=9) that [`super::ldm::params::LdmParams::adjust_for`] expects.
624/// The collapsed `Lazy` tag splits on `lazy_depth` (lazy = 4, lazy2 = 5).
625#[cfg(feature = "hash")]
626fn ldm_strategy_ordinal(tag: super::strategy::StrategyTag, lazy_depth: u8) -> u32 {
627    use super::strategy::StrategyTag;
628    match tag {
629        StrategyTag::Fast => 1,
630        StrategyTag::Dfast => 2,
631        StrategyTag::Greedy => 3,
632        StrategyTag::Lazy => {
633            if lazy_depth >= 2 {
634                5
635            } else {
636                4
637            }
638        }
639        // Donor `ZSTD_btlazy2` ordinal.
640        StrategyTag::Btlazy2 => 6,
641        StrategyTag::BtOpt => 7,
642        StrategyTag::BtUltra => 8,
643        StrategyTag::BtUltra2 => 9,
644    }
645}
646
647/// `ceil(log2(size))` of a source-size hint, with a zero hint floored to
648/// [`MIN_WINDOW_LOG`]. This is the single quantization every hint-dependent
649/// matcher parameter is derived from: the window-log cap, the HC / Fast hash
650/// and chain widths, the Dfast / Row table widths, the L22 config buckets, and
651/// the Fast attach-vs-copy cutoff. Two hints sharing this value resolve to the
652/// identical matcher shape, which is why it (not the raw byte count) keys the
653/// primed-dictionary snapshot — see [`PrimedKey`]. Operates on the full `u64`
654/// so callers comparing a hint against a cutoff get the same bucketed decision
655/// here and at the driver, with no `as usize` truncation on 32-bit targets.
656pub(crate) fn source_size_ceil_log(size: u64) -> u8 {
657    if size == 0 {
658        MIN_WINDOW_LOG
659    } else {
660        (64 - (size - 1).leading_zeros()) as u8
661    }
662}
663
664/// Donor `ZSTD_shouldAttachDict` cutoff for the Fast strategy, as a ceil-log
665/// bucket: 8 KiB = `2^13`, and `bucket <= 13` is exactly `hint <= 8192` because
666/// the bucket is monotone in the hint. A hint at or below this (or unknown,
667/// `None`) ATTACHES the dictionary (a separate immutable table); a larger hint
668/// COPIES it into the live table. Shared by `reset` (which records the mode in
669/// the primed-snapshot key) and `prime_with_dictionary` (which acts on it).
670const FAST_ATTACH_DICT_CUTOFF_LOG: u8 = 13;
671
672/// Dfast counterpart of [`FAST_ATTACH_DICT_CUTOFF_LOG`]: donor
673/// `ZSTD_dictMatchState` attach cutoff for the double-fast strategy is 16 KiB
674/// (`2^14`), so small / unknown-size inputs ATTACH (separate immutable dict
675/// long+short tables + dual-probe in `start_matching_fast_loop`) and larger
676/// known-size inputs COPY (re-prime the dict into the live tables, where the
677/// dense scan matches it as window history). The attach build also self-gates
678/// on `use_fast_loop` inside `skip_matching_for_dict_attach` — only the
679/// fast-loop levels (L3 / Default / L0) carry the dual-probe.
680const DFAST_ATTACH_DICT_CUTOFF_LOG: u8 = 14;
681
682/// `ZSTD_dictMatchState` attach cutoff for the Row (greedy/lazy) strategy is
683/// 32 KiB (`2^15`, donor `attachDictSizeCutoffs`): small / unknown-size inputs
684/// ATTACH the dict into the separate immutable row index (bounded dual-probe in
685/// `row_candidate_rl`), larger known-size inputs dense-COPY into the live rows.
686const ROW_ATTACH_DICT_CUTOFF_LOG: u8 = 15;
687
688// Source-size cap for the dfast hash bits when a size hint is present: a tiny
689// input needs no larger hash than its window. The donor `cParams.hashLog` /
690// `chainLog` (from `DfastConfig`) caps it from above at the call site.
691fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
692    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
693    window_log.max(MIN_WINDOW_LOG as usize)
694}
695
696fn row_hash_bits_for_window(max_window_size: usize) -> usize {
697    // Donor `ZSTD_adjustCParams_internal` cap: `hashLog <= windowLog + 1`.
698    // The `+ 1` is load-bearing for L12, whose donor hashLog (23) exceeds
699    // its windowLog (22) — a plain `windowLog` cap would shrink the L12
700    // table on EVERY hinted reset and split primed snapshots between
701    // hinted and unhinted frames that resolve to the identical geometry.
702    // No constant upper clamp: the old `ROW_HASH_BITS` (20) ceiling
703    // predates the lazy band moving onto Row (L9-12 carry donor hashLog
704    // 21-23).
705    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
706    (window_log + 1).max(MIN_WINDOW_LOG as usize)
707}
708
709/// `floor(log2(window))` for the HashChain table-log cap (donor
710/// `ZSTD_adjustCParams_internal`). The caller clamps the level's `hash_log` /
711/// `chain_log` from above with this so a small hinted input doesn't allocate the
712/// full level's tables.
713fn hc_hash_bits_for_window(max_window_size: usize) -> usize {
714    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
715    window_log.max(MIN_WINDOW_LOG as usize)
716}
717
718/// Parameter table for numeric compression levels 1–22.
719///
720/// Each entry maps a zstd compression level to the best-available matcher
721/// backend and tuning knobs. High levels map to dedicated parse modes:
722/// btopt (16-17), btultra (18), btultra2 (19-22) — matching donor
723/// `clevels.h` (level 19 is `ZSTD_btultra2`, not plain btultra).
724///
725/// Index 0 = level 1, index 21 = level 22.
726#[rustfmt::skip]
727const LEVEL_TABLE: [LevelParams; 22] = [
728    // Exactly one of fast/dfast/hc/row is Some per row, matching the strategy
729    // backend; the rest are None (not dead placeholders).
730    // Lvl  Strategy       wlog  lazy  per-strategy config
731    // ---  -------------- ----  ----  -------------------
732    /* 1 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Fast, search: super::strategy::SearchMethod::Fast, window_log: 19, lazy_depth: 0, fast: Some(FAST_L1), dfast: None, hc: None, row: None },
733    /* 2 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Fast, search: super::strategy::SearchMethod::Fast, window_log: 20, lazy_depth: 0, fast: Some(FAST_L2), dfast: None, hc: None, row: None },
734    /* 3 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, search: super::strategy::SearchMethod::DoubleFast, window_log: 21, lazy_depth: 1, fast: None, dfast: Some(DFAST_L3), hc: None, row: None },
735    /* 4 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, search: super::strategy::SearchMethod::DoubleFast, window_log: 21, lazy_depth: 1, fast: None, dfast: Some(DFAST_L4), hc: None, row: None },
736    // target_len column for L5..=L15 matches donor cParams.targetLength
737    // from clevels.h table[0] (default — srcSize > 256 KB). Donor uses
738    // it as the lazy outer loop's `sufficient_len` (nice-match) threshold.
739    // Inflating it above donor forces the chain walk to complete
740    // search_depth iterations instead of breaking on the first
741    // long-enough match — the dominant cost in the L5..=L15 speed
742    // regression vs FFI (see lazy_band_target_len_matches_donor_default_table).
743    /* 5 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Greedy, search: super::strategy::SearchMethod::RowHash, window_log: 21, lazy_depth: 0, fast: None, dfast: None, hc: None, row: Some(ROW_L5) },
744    // L6-12: the donor runs the lazy/lazy2 strategies on the ROW-based
745    // match finder by default (`ZSTD_resolveRowMatchFinderMode`: row mode
746    // is on for greedy..lazy2 whenever SIMD is available) — a bounded
747    // SIMD tag scan per row instead of a pointer-chasing hash-chain walk.
748    // Our HashChain walk on these levels was ~75% of L10 wall time on the
749    // 1 MiB corpus (dependent chain-table loads). Same `RowConfig`
750    // derivation as `ROW_L5` above, donor values per level in the
751    // `ROW_L6..ROW_L12` comment block.
752    /* 6 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 21, lazy_depth: 1, fast: None, dfast: None, hc: None, row: Some(ROW_L6) },
753    /* 7 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 21, lazy_depth: 1, fast: None, dfast: None, hc: None, row: Some(ROW_L7) },
754    /* 8 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 21, lazy_depth: 2, fast: None, dfast: None, hc: None, row: Some(ROW_L8) },
755    /* 9 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: None, row: Some(ROW_L9) },
756    /*10 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: None, row: Some(ROW_L10) },
757    /*11 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: None, row: Some(ROW_L11) },
758    /*12 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, search: super::strategy::SearchMethod::RowHash, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: None, row: Some(ROW_L12) },
759    // L13-15: reference uses btlazy2 (binary-tree finder) with searchLog 4/5/6
760    // (search_depth 16/32/64) and targetLength 32. We run the hash-chain Lazy
761    // parser here, so we mirror the reference search budget rather than inflate
762    // it: matching the table keeps speed near the reference and makes per-level
763    // perf divergences comparable. The binary-tree finder that would let a
764    // smaller searchLog find longer matches (and re-establish a strict ratio
765    // ladder above L12) is tracked separately; until it lands these levels sit
766    // close to L12 on hash-chain inputs by design.
767    /*13 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Btlazy2, search: super::strategy::SearchMethod::BinaryTree, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 22, chain_log: 22, search_depth: 16, target_len: 32, search_mls: 5 }), row: None },
768    /*14 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Btlazy2, search: super::strategy::SearchMethod::BinaryTree, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 32, search_mls: 5 }), row: None },
769    /*15 */ LevelParams { strategy_tag: super::strategy::StrategyTag::Btlazy2, search: super::strategy::SearchMethod::BinaryTree, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 23, chain_log: 23, search_depth: 64, target_len: 32, search_mls: 5 }), row: None },
770    /*16 */ LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, search: super::strategy::SearchMethod::BinaryTree, window_log: 22, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 48, search_mls: 5 }), row: None },
771    /*17 */ LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, search: super::strategy::SearchMethod::BinaryTree, window_log: 23, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 22, chain_log: 23, search_depth: 32, target_len: 64, search_mls: 4 }), row: None },
772    /*18 */ LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra, search: super::strategy::SearchMethod::BinaryTree, window_log: 23, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 22, chain_log: 23, search_depth: 64, target_len: 64, search_mls: 4 }), row: None },
773    /*19 */ LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, search: super::strategy::SearchMethod::BinaryTree, window_log: 23, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 22, chain_log: 24, search_depth: 128, target_len: 256, search_mls: 4 }), row: None },
774    /*20 */ LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, search: super::strategy::SearchMethod::BinaryTree, window_log: 25, lazy_depth: 2, fast: None, dfast: None, hc: Some(HcConfig { hash_log: 23, chain_log: 25, search_depth: 128, target_len: 256, search_mls: 4 }), row: None },
775    /*21 */ LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, search: super::strategy::SearchMethod::BinaryTree, window_log: 26, lazy_depth: 2, fast: None, dfast: None, hc: Some(BTULTRA2_HC_CONFIG), row: None },
776    /*22 */ LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, search: super::strategy::SearchMethod::BinaryTree, window_log: 27, lazy_depth: 2, fast: None, dfast: None, hc: Some(BTULTRA2_HC_CONFIG_L22), row: None },
777];
778
779/// Donor `minSrcSize` assumption when building a dictionary's prepared cParams
780/// with an unknown source (`zstd_compress.c` `ZSTD_adjustCParams_internal`,
781/// `ZSTD_cpm_createCDict`: `if (dictSize && srcSize == UNKNOWN) srcSize =
782/// minSrcSize` where `minSrcSize = (1<<9) + 1`). Used by [`cdict_table_logs`].
783const DICT_MIN_SRC_SIZE: u64 = 513;
784
785/// Donor `ZSTD_dictAndWindowLog` (`zstd_compress.c`): the window log large
786/// enough to address both the source and the dictionary, used when downsizing
787/// the hash / chain logs for a dictionary-bearing compress. `window_log` is the
788/// (already source-clamped) compress window; `src_size` / `dict_size` are the
789/// assumed source and the dictionary length.
790fn dict_and_window_log(window_log: u8, src_size: u64, dict_size: u64) -> u32 {
791    if dict_size == 0 {
792        return window_log as u32;
793    }
794    let window_size: u64 = 1u64 << window_log;
795    // Plain `+` (matches upstream zstd `ZSTD_dictAndWindowLog`): `window_size` is
796    // `1 << window_log` (window_log <= 31) and dict/src are real data sizes
797    // (<= isize::MAX), so these u64 sums cannot overflow in practice.
798    let dict_and_window = dict_size + window_size;
799    if window_size >= dict_size + src_size {
800        // Window already covers source + dictionary.
801        window_log as u32
802    } else {
803        // ceil(log2(dictAndWindowSize)) = highbit32(x - 1) + 1.
804        source_size_ceil_log(dict_and_window) as u32
805    }
806}
807
808/// Donor `ZSTD_createCDict` table geometry: the `(hash_log, chain_log)` a
809/// dictionary's prepared match-finder tables get, mirroring
810/// `ZSTD_adjustCParams_internal` under `ZSTD_cpm_createCDict`. A dictionary
811/// supplies the long matches, so donor downsizes the table widths toward the
812/// dict-and-window log (assuming a `minSrcSize` source) while the live window
813/// stays source-sized. `window_log` is the resolved compress window; `hash_log`
814/// / `chain_log` are the level's own widths; `uses_bt` selects the binary-tree
815/// `cycleLog` (`chainLog - 1`) vs the hash-chain one (`chainLog`).
816fn cdict_table_logs(
817    window_log: u8,
818    hash_log: usize,
819    chain_log: usize,
820    uses_bt: bool,
821    dict_size: usize,
822) -> (usize, usize) {
823    let dict_size = dict_size as u64;
824    // createCDict assumes a minSrcSize source when the real size is unknown.
825    let src_size = DICT_MIN_SRC_SIZE;
826    // Source-size window resize (donor caps windowLog by ceil_log2(src+dict)).
827    // Plain `+`: src_size is the tiny DICT_MIN_SRC_SIZE constant and dict_size
828    // is a real dictionary length, so the u64 sum cannot overflow.
829    let tsize = src_size + dict_size;
830    let resized_window_log = (window_log as u32)
831        .min(source_size_ceil_log(tsize) as u32)
832        .max(1);
833    let daw = dict_and_window_log(resized_window_log as u8, src_size, dict_size);
834    // `ZSTD_cycleLog(chainLog, strategy)`: chainLog - 1 for binary-tree finders.
835    let cycle_log = (chain_log as u32).saturating_sub(uses_bt as u32);
836    let new_hash_log = if hash_log as u32 > daw + 1 {
837        (daw + 1) as usize
838    } else {
839        hash_log
840    };
841    let new_chain_log = if cycle_log > daw {
842        chain_log.saturating_sub((cycle_log - daw) as usize)
843    } else {
844        chain_log
845    };
846    (new_hash_log, new_chain_log)
847}
848
849/// Smallest window_log the encoder will use regardless of source size.
850pub(crate) const MIN_WINDOW_LOG: u8 = 10;
851/// Conservative floor for source-size-hinted window tuning.
852///
853/// Hinted windows below 16 KiB (`window_log < 14`) currently regress C-FFI
854/// interoperability on certain compressed-block patterns. Keep hinted
855/// windows at 16 KiB or larger until that compatibility gap is closed.
856const MIN_HINTED_WINDOW_LOG: u8 = 14;
857
858/// Adjust level parameters for a known source size.
859///
860/// This derives a cap from `ceil(log2(src_size))`, then clamps it to
861/// [`MIN_HINTED_WINDOW_LOG`] (16 KiB). A zero-byte size hint is treated as
862/// [`MIN_WINDOW_LOG`] for the raw ceil-log step and then promoted to the hinted
863/// floor. This keeps tables bounded for small inputs while preserving the
864/// encoder's baseline minimum supported window.
865/// For the HC backend, `hash_log` and `chain_log` are reduced
866/// proportionally.
867fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
868    // Derive a source-size-based cap from ceil(log2(src_size)), then
869    // clamp first to MIN_WINDOW_LOG (baseline encoder minimum) and then to
870    // MIN_HINTED_WINDOW_LOG (16 KiB hinted floor). For tiny or zero hints we
871    // therefore keep a 16 KiB effective minimum window in hinted mode.
872    // Raw ceil(log2(src_size)) drives the internal table sizes. The
873    // advertised `window_log` is separately floored at MIN_HINTED_WINDOW_LOG
874    // (a decoder-interop requirement on the wire format), but the hash /
875    // chain table widths are internal and never appear in the frame, so they
876    // can track the actual source size below that floor.
877    let raw_src_log = source_size_ceil_log(src_size);
878    let src_log = raw_src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
879    if src_log < params.window_log {
880        params.window_log = src_log;
881    }
882    // Internal match-finder tables are sized from `table_log` — the RAW
883    // source log (floored only at the baseline `MIN_WINDOW_LOG`), NOT the
884    // wire `window_log` floor. The table widths never appear in the frame, so
885    // for small inputs they can track the actual source size and avoid
886    // zeroing a window-sized table per frame; large inputs keep the level's
887    // widths. The cap is applied with the same per-backend headroom the
888    // level table uses, so the load factor (and match quality) is unchanged.
889    // The Dfast backend derives its table widths from the source in `reset`
890    // (`set_hash_bits` recomputes there), so it is not adjusted here. The Row
891    // backend's width IS capped here, mirroring the donor (see the Row branch).
892    let table_log = raw_src_log.max(MIN_WINDOW_LOG);
893    let backend = params.backend();
894    if backend == super::strategy::BackendTag::HashChain {
895        let hc = params
896            .hc
897            .as_mut()
898            .expect("HashChain level row carries an HcConfig");
899        if (table_log + 2) < hc.hash_log as u8 {
900            hc.hash_log = (table_log + 2) as usize;
901        }
902        if (table_log + 1) < hc.chain_log as u8 {
903            hc.chain_log = (table_log + 1) as usize;
904        }
905    } else if backend == super::strategy::BackendTag::Row {
906        let row = params
907            .row
908            .as_mut()
909            .expect("Row level row carries a RowConfig");
910        // Upstream zstd `ZSTD_adjustCParams_internal` (zstd_compress.c): once
911        // the window is source-capped, `hashLog <= windowLog + 1`. The row
912        // table is `2^hash_bits` slots, exactly upstream's row hashTable
913        // `2^hashLog` slots, so the same cap applies. Without it the row table
914        // stays at the level's unbounded width (e.g. L12 hash_bits 23 = 4x
915        // upstream's source-capped 21), the dominant peak-memory excess on the
916        // row band.
917        let row_cap = (table_log + 1) as usize;
918        if row_cap < row.hash_bits {
919            row.hash_bits = row_cap;
920        }
921    } else if backend == super::strategy::BackendTag::Simple {
922        let fast = params
923            .fast
924            .as_mut()
925            .expect("Fast level row carries a FastConfig");
926        let fast_cap = (table_log + 1) as u32;
927        if fast_cap < fast.hash_log {
928            fast.hash_log = fast_cap;
929        }
930    }
931    params
932}
933
934fn level22_btultra2_params_for_source_size(source_size: Option<u64>) -> LevelParams {
935    let mut hc = match source_size {
936        Some(size) if size <= 16 * 1024 => BTULTRA2_HC_CONFIG_L22_16K,
937        Some(size) if size <= 128 * 1024 => BTULTRA2_HC_CONFIG_L22_128K,
938        Some(size) if size <= 256 * 1024 => BTULTRA2_HC_CONFIG_L22_256K,
939        _ => BTULTRA2_HC_CONFIG_L22,
940    };
941    let mut window_log = match source_size {
942        Some(size) if size <= 16 * 1024 => 14,
943        Some(size) if size <= 128 * 1024 => 17,
944        Some(size) if size <= 256 * 1024 => 18,
945        _ => 27,
946    };
947    if let Some(size) = source_size
948        && size > 256 * 1024
949    {
950        let src_log = source_size_ceil_log(size);
951        window_log = window_log.min(src_log.max(MIN_WINDOW_LOG));
952        let adjusted_table_log = window_log as usize + 1;
953        hc.hash_log = hc.hash_log.min(adjusted_table_log);
954        hc.chain_log = hc.chain_log.min(adjusted_table_log);
955    }
956    LevelParams {
957        strategy_tag: super::strategy::StrategyTag::BtUltra2,
958        search: super::strategy::SearchMethod::BinaryTree,
959        window_log,
960        lazy_depth: 2,
961        fast: None,
962        dfast: None,
963        hc: Some(hc),
964        row: None,
965    }
966}
967
968/// Estimated steady-state heap footprint of a one-shot compression context
969/// at `level` (window history + match-finder tables + block staging), in
970/// bytes. Computed from the same per-level tuning table the encoder
971/// resolves at frame start, so the estimate tracks the real allocations;
972/// it is an upper-bound style budget figure, not an exact accounting.
973pub fn estimated_compression_workspace_bytes(level: CompressionLevel) -> usize {
974    use super::strategy::StrategyTag;
975    let params = resolve_level_params(level, None);
976    let window = 1usize << params.window_log;
977    // Mirror `configure()`: the HC3 short-match side table exists only on
978    // the btultra/btultra2 tags (minMatch 3), capped by the window log; the
979    // BT pointer-pair layout fits inside the `4 << chain_log` chain term
980    // (pairs over `chain_log - 1` nodes).
981    let wants_hash3 = matches!(
982        params.strategy_tag,
983        StrategyTag::BtUltra | StrategyTag::BtUltra2
984    );
985    let uses_bt = matches!(
986        params.strategy_tag,
987        StrategyTag::Btlazy2 | StrategyTag::BtOpt | StrategyTag::BtUltra | StrategyTag::BtUltra2
988    );
989    let tables = params.fast.map(|f| 4usize << f.hash_log).unwrap_or(0)
990        + params
991            .dfast
992            .map(|d| (4usize << d.long_hash_log) + (4usize << d.short_hash_log))
993            .unwrap_or(0)
994        + params
995            .hc
996            .map(|h| {
997                let hash3 = if wants_hash3 {
998                    4usize
999                        << super::match_table::storage::HC3_HASH_LOG.min(params.window_log as usize)
1000                } else {
1001                    0
1002                };
1003                (4usize << h.hash_log) + (4usize << h.chain_log) + hash3
1004            })
1005            .unwrap_or(0)
1006        + params
1007            .row
1008            .map(|r| (4usize << r.hash_bits) + (2usize << r.hash_bits))
1009            .unwrap_or(0);
1010    // BT modes box a `BtMatcher`; its retained scratch layout is budgeted
1011    // next to the struct so estimator and allocator evolve together.
1012    let bt = if uses_bt {
1013        super::bt::BtMatcher::estimated_workspace_bytes()
1014    } else {
1015        0
1016    };
1017    // Block staging: literal + sequence buffers plus the compressed-block
1018    // scratch, each bounded by the 128 KiB block size.
1019    let staging = 3 * (128 * 1024);
1020    window + tables + bt + staging
1021}
1022
1023/// Extra steady-state workspace the binary-tree strategies (ordinals 6..=9,
1024/// btlazy2..btultra2) retain beyond the hash/chain tables: the boxed matcher
1025/// plus its scratch arenas, and the HC3 short-match side table for
1026/// btultra/btultra2 (capped by the window log). 0 for non-BT ordinals.
1027pub fn estimated_bt_strategy_extra_bytes(strategy_ordinal: u32, window_log: u32) -> usize {
1028    if !(6..=9).contains(&strategy_ordinal) {
1029        return 0;
1030    }
1031    let hash3 = if matches!(strategy_ordinal, 8 | 9) {
1032        4usize << super::match_table::storage::HC3_HASH_LOG.min(window_log as usize)
1033    } else {
1034        0
1035    };
1036    super::bt::BtMatcher::estimated_workspace_bytes() + hash3
1037}
1038
1039/// Resolve a [`CompressionLevel`] to internal tuning parameters,
1040/// optionally adjusted for a known source size.
1041fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
1042    if matches!(level, CompressionLevel::Level(22)) {
1043        return level22_btultra2_params_for_source_size(source_size);
1044    }
1045    let params = match level {
1046        CompressionLevel::Uncompressed => LevelParams {
1047            strategy_tag: super::strategy::StrategyTag::Fast,
1048            search: super::strategy::SearchMethod::Fast,
1049            // Uncompressed frames emit raw blocks and never reference
1050            // history; advertising a larger window only inflates
1051            // decoder-side buffer reservation. Stay at 17 (128 KiB).
1052            window_log: 17,
1053            lazy_depth: 0,
1054            // Beyond-donor: hash_log=14 (vs donor's 13) for 2× fewer
1055            // collisions on structured corpora. Donor's "base for negative"
1056            // row has targetLength=1 → step_size = 1 + 0 + 1 = 2.
1057            fast: Some(FastConfig {
1058                hash_log: 14,
1059                mls: 6,
1060                step_size: 2,
1061            }),
1062            dfast: None,
1063            hc: None,
1064            row: None,
1065        },
1066        CompressionLevel::Fastest => {
1067            // Only the Fast-specific cParams
1068            // (fast_hash_log / fast_mls / fast_step_size) align
1069            // with Uncompressed / negative-base row. window_log
1070            // stays at LEVEL_TABLE[0]'s value (19) — Fastest still
1071            // does real compression on a full window, unlike
1072            // Uncompressed which clamps to 17.
1073            let mut p = LEVEL_TABLE[0];
1074            p.fast = Some(FastConfig {
1075                hash_log: 14,
1076                mls: 6,
1077                step_size: 2,
1078            });
1079            p
1080        }
1081        CompressionLevel::Default => LEVEL_TABLE[2],
1082        CompressionLevel::Better => LEVEL_TABLE[6],
1083        // Level 13: the first dominant point of the deep-lazy band. The
1084        // mls-wide row key lifted the shallow band's ratio enough that
1085        // level 11 no longer strictly beats level 7 on the ladder corpus;
1086        // the `Best` alias belongs on a config that dominates everything
1087        // below it rather than on a hair-thin margin.
1088        CompressionLevel::Best => LEVEL_TABLE[12],
1089        CompressionLevel::Level(n) => {
1090            if n > 0 {
1091                let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
1092                LEVEL_TABLE[idx]
1093            } else if n == 0 {
1094                // Level 0 = default, matching C zstd semantics.
1095                LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
1096            } else {
1097                // Negative levels — donor sets
1098                // targetLength = -level (clampedCompressionLevel),
1099                // yielding step_size = (-level) + 1 since
1100                // !(targetLength) = 0 when targetLength > 0.
1101                // So L-1..L-7 get step_size 2..8. Acceleration
1102                // gradient comes from larger step skipping more
1103                // positions per iter (faster, worse ratio).
1104                // Clamp to donor's MIN_LEVEL before negating so
1105                // i32::MIN can't overflow on `-n`.
1106                let clamped = n.max(CompressionLevel::MIN_LEVEL);
1107                let target_length = (-clamped) as usize;
1108                let step_size = target_length + 1;
1109                // Donor row-0 ("base for negative", clevels.h srcSize>256KB):
1110                // hashLog=13, minMatch=7. The 32 KiB hash table (2^13 * 4B)
1111                // is L1d-resident on contemporary cores, so every probe is an
1112                // L1 hit; hashLog=14 (64 KiB) overflows a 32 KiB L1d and turns
1113                // each probe into an L2 access. minMatch=7 (vs 6) skips
1114                // short-distance 6-byte matches: fewer sequences, less
1115                // extension/emit work, and parity with the donor's negative
1116                // ladder on both ratio and throughput.
1117                LevelParams {
1118                    strategy_tag: super::strategy::StrategyTag::Fast,
1119                    search: super::strategy::SearchMethod::Fast,
1120                    window_log: 19,
1121                    lazy_depth: 0,
1122                    fast: Some(FastConfig {
1123                        hash_log: 13,
1124                        mls: 7,
1125                        step_size,
1126                    }),
1127                    dfast: None,
1128                    hc: None,
1129                    row: None,
1130                }
1131            }
1132        }
1133    };
1134    if let Some(size) = source_size {
1135        adjust_params_for_source_size(params, size)
1136    } else {
1137        params
1138    }
1139}
1140
1141/// The cheap fingerprint pre-splitter level for a compression level (the
1142/// C-like `blockSplitterLevel`), resolved through the same per-level
1143/// `LevelParams` table as every other tuning knob. `None` keeps the whole
1144/// 128 KiB block. The frame loop reads this instead of hardcoding the
1145/// level→split mapping at the call site.
1146pub(crate) fn level_pre_split(level: CompressionLevel) -> Option<usize> {
1147    // Resolve through `resolve_level_params` directly — NOT via the legacy
1148    // `numeric_level()` alias — so named presets read the SAME table row as
1149    // every other tuning knob (`Best` maps to its own row there, which is
1150    // not the row its numeric alias points at). `Uncompressed` (raw
1151    // blocks) never splits.
1152    if matches!(level, CompressionLevel::Uncompressed) {
1153        return None;
1154    }
1155    resolve_level_params(level, None)
1156        .pre_split()
1157        .map(usize::from)
1158}
1159
1160/// Backend storage for [`MatchGeneratorDriver`]. Exactly one match-finder
1161/// state lives in the driver at a time — the active variant. Backend
1162/// transitions in [`Matcher::reset`] drain the current variant's allocations
1163/// into the shared `vec_pool` and then replace `storage` with a freshly
1164/// constructed variant for the new backend.
1165///
1166/// Replaces the prior pattern of four parallel fields (`match_generator`,
1167/// `dfast_match_generator: Option<…>`, `row_match_generator: Option<…>`,
1168/// `hc_match_generator: Option<…>`) + an `active_backend: BackendTag`
1169/// discriminator: the parallel layout kept drained inner structures
1170/// allocated across backend switches, and every per-frame/per-slice
1171/// driver operation had to dispatch on `active_backend` to pick the
1172/// right field. A single enum collapses the storage and makes the
1173/// dispatcher pattern-match on the storage variant directly — same
1174/// number of arms, but `storage.backend()` is now the canonical source
1175/// of truth and dead variants are dropped when the active backend
1176/// changes.
1177#[derive(Clone)]
1178enum MatcherStorage {
1179    /// Donor `ZSTD_fast` family. Constructed by
1180    /// [`MatchGeneratorDriver::new`] as the initial variant and
1181    /// re-selected by [`Matcher::reset`] for any [`CompressionLevel`]
1182    /// that `resolve_level_params` maps to [`StrategyTag::Fast`]
1183    /// (`Uncompressed`, `Fastest`, `Level(1)`, and any non-positive
1184    /// `Level(n)` not equal to `0`).
1185    Simple(FastKernelMatcher),
1186    /// Donor `ZSTD_dfast` family — two-table hash chain. Selected for
1187    /// any level that resolves to [`StrategyTag::Dfast`] in
1188    /// `resolve_level_params` (`Default`, `Level(0)`, `Level(2)`,
1189    /// `Level(3)`).
1190    Dfast(DfastMatchGenerator),
1191    /// Donor `ZSTD_greedy` family with row hashing. Selected for any
1192    /// level that resolves to [`StrategyTag::Greedy`] (currently
1193    /// `Level(4)` only).
1194    Row(RowMatchGenerator),
1195    /// Donor `ZSTD_lazy2` and the BT-based optimal modes
1196    /// (`btopt` / `btultra` / `btultra2`). Selected for any level that
1197    /// resolves to [`StrategyTag::Lazy`], [`StrategyTag::BtOpt`],
1198    /// [`StrategyTag::BtUltra`], or [`StrategyTag::BtUltra2`]
1199    /// (`Better`, `Best`, `Level(5..=22)`, and any `Level(n)` with
1200    /// `n > MAX_LEVEL` — `resolve_level_params` clamps positive
1201    /// numeric levels at `MAX_LEVEL = 22` via
1202    /// `Level(n).clamp(1, MAX_LEVEL)`, so `Level(23..=i32::MAX)` all
1203    /// land on `BtUltra2` here). The [`HcMatchGenerator`]'s internal
1204    /// [`HcBackend`] discriminator decides whether BT scratch is
1205    /// allocated.
1206    HashChain(HcMatchGenerator),
1207}
1208
1209impl MatcherStorage {
1210    /// Heap bytes the active backend variant holds (tables, history, scratch).
1211    fn heap_size(&self) -> usize {
1212        match self {
1213            Self::Simple(m) => m.heap_size(),
1214            Self::Dfast(m) => m.heap_size(),
1215            Self::Row(m) => m.heap_size(),
1216            Self::HashChain(m) => m.heap_size(),
1217        }
1218    }
1219
1220    /// [`super::strategy::BackendTag`] family of the active variant.
1221    fn backend(&self) -> super::strategy::BackendTag {
1222        use super::strategy::BackendTag;
1223        match self {
1224            Self::Simple(_) => BackendTag::Simple,
1225            Self::Dfast(_) => BackendTag::Dfast,
1226            Self::Row(_) => BackendTag::Row,
1227            Self::HashChain(_) => BackendTag::HashChain,
1228        }
1229    }
1230}
1231
1232/// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible.
1233pub struct MatchGeneratorDriver {
1234    vec_pool: Vec<Vec<u8>>,
1235    /// Active match-finder state. Exactly one backend lives here at a
1236    /// time; [`Matcher::reset`] drains the previous variant into
1237    /// `vec_pool` before swapping in a freshly constructed variant for
1238    /// the new backend. `storage.backend()` is the canonical source of
1239    /// truth for the parse family; `strategy_tag` carries the
1240    /// compile-time strategy chosen at the last `reset()`.
1241    storage: MatcherStorage,
1242    // Compile-time strategy tag resolved at `reset()` from the
1243    // requested `CompressionLevel`'s `LevelParams`. The driver's
1244    // hot-block dispatcher in `blocks/compressed.rs` matches on
1245    // this tag to enter the corresponding `Strategy`
1246    // monomorphisation (`compress_block::<S>`).
1247    strategy_tag: super::strategy::StrategyTag,
1248    // Decoupled search-method axis resolved at `reset()` from
1249    // `LevelParams.search`. The per-block dispatcher routes on this
1250    // (not on `strategy_tag`) so a level's parse and search backend can
1251    // be chosen independently. The `BinaryTree` arm still consults
1252    // `strategy_tag` to pick the opt `Strategy` ZST.
1253    search: super::strategy::SearchMethod,
1254    // Decoupled parse-mode axis resolved at `reset()` from
1255    // `LevelParams::parse()`. Independent of `search`: greedy / lazy /
1256    // lazy2 can run on any non-opt search backend. The backends still
1257    // read their own `lazy_depth` (kept in sync at `reset()`); this is
1258    // the authoritative parse selector for the dispatcher.
1259    parse: super::strategy::ParseMode,
1260    /// Test-only per-level recipe override applied in `reset()` before
1261    /// backend selection. Lets the parse×search matrix be exercised
1262    /// without editing `LEVEL_TABLE`; never compiled into production.
1263    #[cfg(test)]
1264    config_override: Option<(super::strategy::SearchMethod, super::strategy::ParseMode)>,
1265    /// Fine-grained per-knob overrides from the public
1266    /// [`super::parameters::CompressionParameters`] surface (#27).
1267    /// `None` (or an all-`None` [`super::parameters::ParamOverrides`])
1268    /// keeps the resolved level geometry byte-identical to plain
1269    /// level-based compression. Applied in [`Matcher::reset`] after the
1270    /// level params are resolved, before backend selection. Persists
1271    /// across resets (it is frame configuration, not a one-shot) until
1272    /// the caller changes it.
1273    param_overrides: Option<super::parameters::ParamOverrides>,
1274    slice_size: usize,
1275    base_slice_size: usize,
1276    // Frame header window size must stay at the configured live-window budget.
1277    // Dictionary retention expands internal matcher capacity only.
1278    reported_window_size: usize,
1279    // Tracks currently retained bytes that originated from primed dictionary
1280    // history and have not been evicted yet.
1281    dictionary_retained_budget: usize,
1282    // Source size hint for next frame (set via set_source_size_hint, cleared on reset).
1283    source_size_hint: Option<u64>,
1284    // Dictionary content size for the next frame (set via set_dictionary_size_hint,
1285    // consumed on reset). When present on a binary-tree / hash-chain backend, the
1286    // match-finder hash/chain tables are sized from the DICTIONARY (donor CDict
1287    // economics: a loaded dictionary supplies the long matches, so the live tables
1288    // can shrink to the dict's size tier) while the eviction window stays
1289    // source-sized. Mirrors donor `ZSTD_getCParamRowSize`, which picks the cParams
1290    // table column from `dictSize` for a dictionary-bearing compress.
1291    dictionary_size_hint: Option<usize>,
1292    // Normalized `ceil_log2` bucket of the frame's source-size hint, captured at
1293    // `reset` (where `source_size_hint` is consumed) via [`source_size_ceil_log`].
1294    // `None` means the frame was unhinted. Drives `prime_with_dictionary`'s donor
1295    // `ZSTD_shouldAttachDict` mode for the Simple/Fast backend: `None` (unknown)
1296    // or `<= FAST_ATTACH_DICT_CUTOFF_LOG` → attach (separate dict table, 2-cursor
1297    // `compress_block_fast_dict`); larger → copy (dictionary primed into the live
1298    // table, 4-cursor `compress_block_fast`). The primed-snapshot key is the
1299    // resolved shape ([`reset_shape`](Self::reset_shape)), not this bucket.
1300    reset_size_log: Option<u8>,
1301    // Hint-resolved matcher shape from the last `reset`: the [`LevelParams`], the
1302    // active backend's applied Dfast/Row hash-table width (`0` for HC/Fast), the
1303    // Fast attach-vs-copy mode, and the active LDM override (#27). Combined with
1304    // the frame's level into the [`PrimedKey`] that keys the primed snapshot, so
1305    // it is only restored into a reset that resolved the identical matcher AND
1306    // LDM configuration. `None` before the first `reset`.
1307    reset_shape: Option<(
1308        LevelParams,
1309        usize,
1310        bool,
1311        Option<super::parameters::LdmOverride>,
1312    )>,
1313    // One-shot borrowed block range `[start, end)` staged by the borrowed
1314    // Fast frame path (`set_borrowed_block`) for the NEXT
1315    // `start_matching` / `skip_matching_with_hint`. `Some` routes that
1316    // call to the Simple backend's borrowed scan instead of the owned
1317    // committed-block path; consumed (reset to `None`) by the routed
1318    // call. Always `None` on the owned streaming path.
1319    borrowed_pending: Option<(usize, usize)>,
1320    /// CDict-equivalent: snapshot of the post-prime matcher state taken
1321    /// once after the first dictionary prime — the backend `storage`
1322    /// (hash tables + dictionary history + offset history + window) plus
1323    /// the driver-level `dictionary_retained_budget`, the only two pieces
1324    /// `prime_with_dictionary` writes. Subsequent frames restore this
1325    /// (a table memcpy) instead of re-hashing every dictionary position,
1326    /// mirroring donor `ZSTD_compressBegin_usingCDict` copying the
1327    /// precomputed `cdict->matchState`. Invalidated when the dictionary
1328    /// changes; keyed by the [`PrimedKey`] resolved matcher shape so a snapshot
1329    /// is only restored into a reset that produces the same matcher — see
1330    /// `restore_primed_dictionary`.
1331    primed: Option<(MatcherStorage, usize, PrimedKey)>,
1332}
1333
1334/// Identity of the matcher configuration a primed snapshot was captured under:
1335/// the FULLY RESOLVED matcher shape, not the raw source-size hint.
1336///
1337/// `reset()` resolves the hint into a [`LevelParams`] (window_log cap, the
1338/// HC/Fast table and search geometry, the parse depth/target-length that get
1339/// baked into the restored `storage`) plus, for the Dfast/Row backends, a
1340/// table-width derived from the hint's ceil-log bucket. The mapping from hint
1341/// to resolved shape is many-to-one: the source-size adjustment is monotone in
1342/// `ceil_log2(hint)`, and Level 22 additionally collapses several buckets onto
1343/// one donor tier (its `<= 16/128/256 KiB` thresholds). Keying on the raw hint
1344/// (or even its ceil-log bucket) therefore over-keys — two hints that resolve
1345/// to the identical matcher would each force a full re-prime. Keying on the
1346/// resolved (`params`, `table_bits`) pair restores across them.
1347///
1348/// `table_bits` is the hint-dependent hash-table width the ACTIVE backend
1349/// applied (`set_hash_bits` value for Dfast/Row; `0` for HC/Fast, whose widths
1350/// already live in `params`). The snapshot is only ever captured on the COPY
1351/// path (a hinted, above-cutoff frame), so `table_bits` is always the resolved
1352/// Dfast/Row value there, never the unhinted default.
1353///
1354/// `level` is kept alongside the resolved `params` because some stored matcher
1355/// state is derived from the level DIRECTLY, not through `params`: e.g. Dfast's
1356/// `use_fast_loop` is true for L3 but false for L4, yet L3 and L4 resolve to
1357/// byte-identical `params`. Without `level` a snapshot captured at L3 could be
1358/// restored into an L4 reset, installing the wrong `use_fast_loop`.
1359///
1360/// `fast_attach` records the Fast backend's attach-vs-copy mode
1361/// ([`FAST_ATTACH_DICT_CUTOFF_LOG`]) because that cutoff (8 KiB) falls INSIDE a
1362/// single resolved shape: an 8192- and an 8193-byte Level 1 hint both clamp to
1363/// window_log 14 with identical `params`/`table_bits`, yet 8192 attaches (a
1364/// separate dict table) while 8193 copies into the live table — two different
1365/// `storage` shapes. The frame compressor only captures/restores snapshots on
1366/// the copy path today, but keying on the mode keeps the snapshot identity
1367/// self-sufficient rather than relying on that external gate.
1368///
1369/// Restoring a snapshot whose key differs would reinstate the old `storage`
1370/// (and its `max_window_size` / table dimensions / parse params / dict-table
1371/// shape) under a reset that resolved a different shape — the encoder could
1372/// then search past the frame header's window and emit an undecodable match.
1373/// All fields must match before a restore is allowed.
1374#[derive(Clone, Copy, PartialEq, Eq)]
1375struct PrimedKey {
1376    level: super::CompressionLevel,
1377    params: LevelParams,
1378    table_bits: usize,
1379    fast_attach: bool,
1380    /// Fine-grained LDM override (#27) active at capture time. The
1381    /// snapshot's cloned `storage` carries `BtMatcher::ldm_producer`,
1382    /// which is configured from this override; restoring a snapshot
1383    /// captured under a different LDM configuration (enable flip or
1384    /// changed knobs) would reinstate a stale producer. `params` already
1385    /// pins `window_log` / `strategy_tag` (the rest of the producer's
1386    /// identity), so folding the override completes the LDM identity.
1387    /// `None` = LDM off, matching `ParamOverrides::ldm`.
1388    ldm: Option<super::parameters::LdmOverride>,
1389}
1390
1391impl MatchGeneratorDriver {
1392    /// `slice_size` sets the base block allocation size used for matcher input chunks.
1393    /// `max_slices_in_window` determines the initial window capacity at construction
1394    /// time. Effective window sizing is recalculated on every [`reset`](Self::reset)
1395    /// from the resolved compression level and optional source-size hint.
1396    pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
1397        // Validate inputs before deriving window_log_init. Three
1398        // failure modes need explicit guards:
1399        //
1400        // 1. Zero args → `max_window_size = 0` → silent 1-byte
1401        //    degenerate window (useless).
1402        // 2. Multiplication overflow on `slice_size *
1403        //    max_slices_in_window` → wraps silently in release.
1404        // 3. `next_power_of_two` overflow when the product is
1405        //    above `1 << (usize::BITS - 1)` → modern Rust PANICS
1406        //    on overflow (older Rust returned 0).
1407        //
1408        // Catch all three at construction with a clear domain-
1409        // specific message via `assert!` + `checked_mul` +
1410        // `checked_next_power_of_two`, rather than letting either
1411        // mode produce a silent degenerate matcher OR a generic
1412        // panic deep in `FastKernelMatcher::with_params`.
1413        assert!(
1414            slice_size > 0,
1415            "MatchGeneratorDriver::new requires slice_size > 0 (got 0)",
1416        );
1417        assert!(
1418            max_slices_in_window > 0,
1419            "MatchGeneratorDriver::new requires max_slices_in_window > 0 (got 0)",
1420        );
1421        let max_window_size = max_slices_in_window
1422            .checked_mul(slice_size)
1423            .expect("MatchGeneratorDriver::new: slice_size * max_slices_in_window overflows usize");
1424        // Derive an effective window_log for the initial-state matcher.
1425        // `MatchGeneratorDriver::new` runs BEFORE any reset, so it has
1426        // no LevelParams to consult — we initialise to whatever
1427        // window_log fits the caller's requested max_window_size
1428        // (round up to the next power of two via `next_power_of_two`'s
1429        // log). Reset() overwrites all three params from the resolved
1430        // LevelParams.
1431        //
1432        // `checked_next_power_of_two` returns `None` if the next power
1433        // of two would overflow `usize`. Modern Rust's
1434        // `next_power_of_two` PANICS on overflow rather than returning
1435        // 0 (the panic message is generic and unhelpful), so use the
1436        // checked variant to surface the failure with a clear,
1437        // domain-specific error.
1438        let next_pow2 = max_window_size.checked_next_power_of_two().expect(
1439            "MatchGeneratorDriver::new: max_window_size too large for \
1440             next_power_of_two without overflow",
1441        );
1442        let window_log_init = next_pow2.trailing_zeros() as u8;
1443        Self {
1444            vec_pool: Vec::new(),
1445            storage: MatcherStorage::Simple(FastKernelMatcher::with_params(
1446                window_log_init,
1447                FAST_LEVEL_1_HASH_LOG,
1448                FAST_LEVEL_1_MLS,
1449                2, // donor default step_size (targetLength=0 → step=2)
1450            )),
1451            strategy_tag: super::strategy::StrategyTag::Fast,
1452            search: super::strategy::SearchMethod::Fast,
1453            parse: super::strategy::ParseMode::Greedy,
1454            #[cfg(test)]
1455            config_override: None,
1456            param_overrides: None,
1457            slice_size,
1458            base_slice_size: slice_size,
1459            // Report the ROUNDED-UP window size that the matcher
1460            // actually carries (via `window_log_init = log2(next_pow2)`
1461            // → matcher's `max_window_size = 1 << window_log_init =
1462            // next_pow2`). For non-power-of-two `slice_size *
1463            // max_slices_in_window` inputs, the unrounded value
1464            // would under-report the active backend's window until
1465            // the first `reset()` overwrites both sides from the
1466            // resolved LevelParams.
1467            reported_window_size: next_pow2,
1468            reset_size_log: None,
1469            reset_shape: None,
1470            dictionary_retained_budget: 0,
1471            source_size_hint: None,
1472            dictionary_size_hint: None,
1473            borrowed_pending: None,
1474            primed: None,
1475        }
1476    }
1477
1478    fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
1479        resolve_level_params(level, source_size)
1480    }
1481
1482    /// Install the public-parameter per-knob overrides (#27) applied at
1483    /// the next [`Matcher::reset`]. `None` (or an all-`None` set) restores
1484    /// plain level-based geometry. Persists across resets until changed.
1485    pub(crate) fn set_param_overrides(
1486        &mut self,
1487        overrides: Option<super::parameters::ParamOverrides>,
1488    ) {
1489        self.param_overrides = overrides;
1490    }
1491
1492    /// Active backend family derived from the storage variant. Single
1493    /// source of truth — no separate runtime tag to drift against.
1494    pub(crate) fn active_backend(&self) -> super::strategy::BackendTag {
1495        self.storage.backend()
1496    }
1497
1498    /// Whether the borrowed (no-copy, in-place over-window) scan is
1499    /// implemented for the current backend + search configuration. The
1500    /// HashChain backend serves both the lazy CHAIN parser
1501    /// (`SearchMethod::HashChain`) and the BT/optimal parsers
1502    /// (`SearchMethod::BinaryTree`); only the lazy chain has a borrowed scan
1503    /// so far, so BT/optimal stay on the owned path.
1504    pub(crate) fn borrowed_supported(&self) -> bool {
1505        use super::strategy::{BackendTag, SearchMethod, StrategyTag};
1506        match self.active_backend() {
1507            BackendTag::Simple | BackendTag::Dfast | BackendTag::Row => true,
1508            // The HashChain backend covers two searches: the lazy CHAIN parser
1509            // (borrowed-capable) and the BINARY-TREE search (btlazy2 L13-15 +
1510            // optimal BtOpt/BtUltra/BtUltra2 L16-22). btlazy2's BT-tree borrowed
1511            // scan is byte-identical to owned (reads via live_history()), so it
1512            // takes the in-place path. The OPTIMAL parsers stay owned: their
1513            // cost-based DP is sensitive to candidate quality, and the borrowed
1514            // continuous-index scan yields slightly different (ratio-worse)
1515            // candidates than the owned evict+rehash scan — borrowed optimal
1516            // both diverged from owned and fell outside the ffi ratio bound.
1517            // Search-aware (not just strategy_tag) so optimal BT can never be
1518            // staged on the borrowed path even via an internal caller.
1519            BackendTag::HashChain => match self.search {
1520                SearchMethod::HashChain => true,
1521                SearchMethod::BinaryTree => matches!(self.strategy_tag, StrategyTag::Btlazy2),
1522                _ => false,
1523            },
1524        }
1525    }
1526
1527    fn simple_mut(&mut self) -> &mut FastKernelMatcher {
1528        match &mut self.storage {
1529            MatcherStorage::Simple(m) => m,
1530            _ => panic!("simple backend must be initialized by reset() before use"),
1531        }
1532    }
1533
1534    /// Reclaim the per-block input buffer that the Simple backend
1535    /// just spent inside `start_matching` / `skip_matching_with_hint`.
1536    ///
1537    /// `FastKernelMatcher::take_recycled_space` returns the cleared
1538    /// (capacity-retained) `Vec<u8>` from the last
1539    /// `extend_history_with_pending`. We push it onto `vec_pool`
1540    /// as-is (with `len = 0`); `get_next_space()` is responsible for
1541    /// resizing the buffer back to `slice_size` on its next pop. The
1542    /// pushed length is irrelevant — only the capacity matters, and
1543    /// `extend_history_with_pending` preserves it. Without this
1544    /// recycle path, the Simple backend would allocate a new
1545    /// `Vec<u8>` per block — a measurable hot-path cost when blocks
1546    /// are small (~128 KiB) and processed at hundreds of MiB/s.
1547    fn recycle_simple_space(&mut self) {
1548        if let Some(space) = self.simple_mut().take_recycled_space() {
1549            // `space` is already cleared (len = 0) by
1550            // `extend_history_with_pending`; capacity is retained.
1551            // Leaving `len = 0` here avoids the cost of zero-filling
1552            // the entire allocation — `get_next_space()` resizes the
1553            // popped buffer up to `slice_size` on demand, so the
1554            // length the pool holds is irrelevant. This matters most
1555            // after a small-source-size hint has shrunk `slice_size`
1556            // mid-frame: the recycled buffer can be much larger than
1557            // the current `slice_size`, and zero-filling 128 KiB+ on
1558            // every block would erase the perf win the recycle path
1559            // is meant to deliver.
1560            self.vec_pool.push(space);
1561        }
1562    }
1563
1564    /// Register a caller-owned input buffer as the Simple backend's
1565    /// borrowed one-shot match window. Only valid on the Simple (Fast)
1566    /// backend; the one-shot frame path gates on that before calling.
1567    ///
1568    /// # Safety
1569    /// Same contract as [`FastKernelMatcher::set_borrowed_window`]: the
1570    /// buffer must stay live and unmodified until the window is cleared,
1571    /// and must be cleared before the buffer is dropped or the matcher is
1572    /// reused for another frame.
1573    pub(crate) unsafe fn set_borrowed_window(&mut self, buffer: &[u8]) {
1574        // SAFETY: forwarded contract — caller upholds liveness/clear.
1575        match self.active_backend() {
1576            super::strategy::BackendTag::Simple => unsafe {
1577                self.simple_mut().set_borrowed_window(buffer)
1578            },
1579            super::strategy::BackendTag::Dfast => unsafe {
1580                self.dfast_matcher_mut().set_borrowed_window(buffer)
1581            },
1582            super::strategy::BackendTag::Row => unsafe {
1583                self.row_matcher_mut().set_borrowed_window(buffer)
1584            },
1585            super::strategy::BackendTag::HashChain => unsafe {
1586                self.hc_matcher_mut().set_borrowed_window(buffer)
1587            },
1588        }
1589    }
1590
1591    /// Clear the borrowed one-shot window, returning the active backend
1592    /// to the owned `history` path.
1593    pub(crate) fn clear_borrowed_window(&mut self) {
1594        match self.active_backend() {
1595            super::strategy::BackendTag::Simple => self.simple_mut().clear_borrowed_window(),
1596            super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().clear_borrowed_window(),
1597            super::strategy::BackendTag::Row => self.row_matcher_mut().clear_borrowed_window(),
1598            super::strategy::BackendTag::HashChain => self.hc_matcher_mut().clear_borrowed_window(),
1599            #[allow(unreachable_patterns)]
1600            _ => {}
1601        }
1602        self.borrowed_pending = None;
1603    }
1604
1605    /// Stage the borrowed block range `[block_start, block_end)` for the
1606    /// NEXT `start_matching` / `skip_matching_with_hint`, which the
1607    /// borrowed Fast frame path uses in place of `commit_space`. While
1608    /// staged, those trait calls route to the Simple backend's borrowed
1609    /// scan/skip (consuming the stage) instead of the owned committed
1610    /// block. See [`Matcher::start_matching`] /
1611    /// [`Matcher::skip_matching_with_hint`] on this type.
1612    pub(crate) fn set_borrowed_block(&mut self, block_start: usize, block_end: usize) {
1613        assert!(
1614            self.borrowed_supported(),
1615            "borrowed block staging is not supported for the active backend/search config",
1616        );
1617        assert!(
1618            block_start <= block_end,
1619            "borrowed block range must satisfy start <= end (start={block_start} end={block_end})",
1620        );
1621        self.borrowed_pending = Some((block_start, block_end));
1622        // Make the range visible to `get_last_space()` immediately: the
1623        // emit pipeline reads `get_last_space().len()` in
1624        // `collect_block_parts` BEFORE `start_matching` consumes the
1625        // stage, so the staged block (not the whole borrowed window) must
1626        // be reported now to keep the literal-buffer reservation right.
1627        match self.active_backend() {
1628            super::strategy::BackendTag::Simple => self
1629                .simple_mut()
1630                .stage_borrowed_block(block_start, block_end),
1631            super::strategy::BackendTag::Dfast => self
1632                .dfast_matcher_mut()
1633                .stage_borrowed_block(block_start, block_end),
1634            super::strategy::BackendTag::Row => self
1635                .row_matcher_mut()
1636                .stage_borrowed_block(block_start, block_end),
1637            super::strategy::BackendTag::HashChain => self
1638                .hc_matcher_mut()
1639                .table
1640                .stage_borrowed_block(block_start, block_end),
1641        }
1642    }
1643
1644    #[cfg(test)]
1645    fn dfast_matcher(&self) -> &DfastMatchGenerator {
1646        match &self.storage {
1647            MatcherStorage::Dfast(m) => m,
1648            _ => panic!("dfast backend must be initialized by reset() before use"),
1649        }
1650    }
1651
1652    fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
1653        match &mut self.storage {
1654            MatcherStorage::Dfast(m) => m,
1655            _ => panic!("dfast backend must be initialized by reset() before use"),
1656        }
1657    }
1658
1659    #[cfg(test)]
1660    fn row_matcher(&self) -> &RowMatchGenerator {
1661        match &self.storage {
1662            MatcherStorage::Row(m) => m,
1663            _ => panic!("row backend must be initialized by reset() before use"),
1664        }
1665    }
1666
1667    fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
1668        match &mut self.storage {
1669            MatcherStorage::Row(m) => m,
1670            _ => panic!("row backend must be initialized by reset() before use"),
1671        }
1672    }
1673
1674    #[cfg(test)]
1675    fn hc_matcher(&self) -> &HcMatchGenerator {
1676        match &self.storage {
1677            MatcherStorage::HashChain(m) => m,
1678            _ => panic!("hash chain backend must be initialized by reset() before use"),
1679        }
1680    }
1681
1682    fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
1683        match &mut self.storage {
1684            MatcherStorage::HashChain(m) => m,
1685            _ => panic!("hash chain backend must be initialized by reset() before use"),
1686        }
1687    }
1688
1689    /// Shrink the active backend's `max_window_size` by the bytes
1690    /// reclaimed from the dictionary-retention budget. Returns `true`
1691    /// iff any reclamation happened — the caller uses that as the
1692    /// gate for [`Self::trim_after_budget_retire`] (which is a no-op
1693    /// otherwise: with `max_window_size` unchanged the backend's
1694    /// `trim_to_window` cannot find anything to evict, so calling it
1695    /// just runs an extra `match` ladder + a single early-out check
1696    /// per slice commit).
1697    #[must_use]
1698    fn retire_dictionary_budget(&mut self, evicted_bytes: usize) -> bool {
1699        let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
1700        if reclaimed == 0 {
1701            return false;
1702        }
1703        self.dictionary_retained_budget -= reclaimed;
1704        match self.active_backend() {
1705            super::strategy::BackendTag::Simple => {
1706                let matcher = self.simple_mut();
1707                // `reclaimed` can exceed the CURRENT `max_window_size`: the
1708                // retained dict budget is tracked independently and the
1709                // window may already have been shrunk by a prior eviction,
1710                // so the floor at 0 is the correct clamp, not a masked bug.
1711                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
1712            }
1713            super::strategy::BackendTag::Dfast => {
1714                let matcher = self.dfast_matcher_mut();
1715                // `reclaimed` can exceed the CURRENT `max_window_size`: the
1716                // retained dict budget is tracked independently and the
1717                // window may already have been shrunk by a prior eviction,
1718                // so the floor at 0 is the correct clamp, not a masked bug.
1719                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
1720            }
1721            super::strategy::BackendTag::Row => {
1722                let matcher = self.row_matcher_mut();
1723                // `reclaimed` can exceed the CURRENT `max_window_size`: the
1724                // retained dict budget is tracked independently and the
1725                // window may already have been shrunk by a prior eviction,
1726                // so the floor at 0 is the correct clamp, not a masked bug.
1727                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
1728            }
1729            super::strategy::BackendTag::HashChain => {
1730                let matcher = self.hc_matcher_mut();
1731                // See the Simple arm: `reclaimed` may exceed the current
1732                // window, so saturating to 0 is the correct clamp.
1733                matcher.table.max_window_size =
1734                    matcher.table.max_window_size.saturating_sub(reclaimed);
1735            }
1736        }
1737        true
1738    }
1739
1740    fn trim_after_budget_retire(&mut self) {
1741        loop {
1742            let mut evicted_bytes = 0usize;
1743            match self.active_backend() {
1744                super::strategy::BackendTag::Simple => {
1745                    // FastKernelMatcher owns its history as a single
1746                    // flat `Vec<u8>` (donor's flat-buffer layout)
1747                    // rather than the legacy per-block `WindowEntry`
1748                    // stack. There are no per-block Vec allocations
1749                    // to recycle into `vec_pool` — `trim_to_window`
1750                    // drains the oldest bytes in-place and returns
1751                    // the count for the dictionary-budget loop's
1752                    // termination check.
1753                    let MatcherStorage::Simple(m) = &mut self.storage else {
1754                        unreachable!("active_backend() == Simple proven above");
1755                    };
1756                    evicted_bytes += m.trim_to_window();
1757                }
1758                super::strategy::BackendTag::Dfast => {
1759                    // Dfast doesn't retain input Vecs — `history` is the
1760                    // only byte store, so there is no per-block buffer
1761                    // to push back through a callback. Eviction byte
1762                    // count is derived from the `window_size` delta
1763                    // before/after; the Dfast variant of
1764                    // `trim_to_window` takes no closure, sidestepping
1765                    // an unused-`impl FnMut` monomorphization that
1766                    // would otherwise contractually never fire.
1767                    let dfast = self.dfast_matcher_mut();
1768                    let pre = dfast.window_size;
1769                    dfast.trim_to_window();
1770                    evicted_bytes += pre - dfast.window_size;
1771                }
1772                super::strategy::BackendTag::Row => {
1773                    // Row keeps bytes only in the contiguous `history` mirror
1774                    // (block buffers are returned to the pool per block in
1775                    // `add_data`), so derive the eviction count from the
1776                    // `window_size` delta, mirroring the Dfast / HashChain arms.
1777                    let row = self.row_matcher_mut();
1778                    let pre = row.window_size;
1779                    row.trim_to_window();
1780                    evicted_bytes += pre - row.window_size;
1781                }
1782                super::strategy::BackendTag::HashChain => {
1783                    // HC keeps bytes only in the contiguous `history` mirror
1784                    // (no per-block Vecs to recycle since the window<->history
1785                    // dedup), so derive the eviction count from the
1786                    // `window_size` delta, mirroring the Dfast arm above.
1787                    let table = &mut self.hc_matcher_mut().table;
1788                    let pre = table.window_size;
1789                    table.trim_to_window();
1790                    evicted_bytes += pre - table.window_size;
1791                }
1792            }
1793            if evicted_bytes == 0 {
1794                break;
1795            }
1796            // The loop's invariant is "the backend's previous
1797            // `max_window_size` shrink had downstream bytes left to
1798            // evict" — that's what `evicted_bytes != 0` proves at
1799            // this point. `dictionary_retained_budget` is NOT
1800            // guaranteed to be positive here: the outer
1801            // `retire_dictionary_budget` call may have already
1802            // drained it to zero by reclaiming the last retained
1803            // bytes, while the backend still has bytes above the
1804            // freshly-shrunk window cap waiting for this loop to
1805            // evict. The return value of the retire call below is
1806            // therefore intentionally discarded — the loop's
1807            // termination is driven by `evicted_bytes == 0`, not by
1808            // whether the budget has more bytes left to reclaim.
1809            let _ = self.retire_dictionary_budget(evicted_bytes);
1810        }
1811    }
1812
1813    fn skip_matching_for_dictionary_priming(&mut self) {
1814        match self.active_backend() {
1815            super::strategy::BackendTag::Simple => {
1816                // Donor `ZSTD_shouldAttachDict` mode selection for the Fast
1817                // strategy (cutoff 8 KB): small / unknown-size inputs ATTACH
1818                // (index dict positions into a SEPARATE immutable table; the
1819                // dual-probe 2-cursor `compress_block_fast_dict` then prefers
1820                // recent-input matches and falls back to the dict — the path
1821                // that wins small/unknown). Large known-size inputs COPY (prime
1822                // dict into the live table; the 4-cursor `compress_block_fast`
1823                // matches against it as window history — the path that already
1824                // matches/beats the donor on large corpora). The dispatch in
1825                // `start_matching` keys off `dict_table.is_some()`, which only
1826                // the attach path populates. See [`FAST_ATTACH_DICT_CUTOFF_LOG`].
1827                let attach = self
1828                    .reset_size_log
1829                    .is_none_or(|log| log <= FAST_ATTACH_DICT_CUTOFF_LOG);
1830                if attach {
1831                    self.simple_mut().skip_matching_for_dict_prime();
1832                } else {
1833                    self.simple_mut().skip_matching_with_hint(Some(false));
1834                }
1835                self.recycle_simple_space();
1836            }
1837            super::strategy::BackendTag::Dfast => {
1838                // Donor `ZSTD_dictMatchState` mode selection for dfast (cutoff
1839                // 16 KiB): small / unknown-size inputs ATTACH (build the
1840                // separate immutable dict long+short tables; the dual-probe
1841                // `start_matching_fast_loop` searches live + dict, the path that
1842                // avoids the per-frame dict re-prime that dominates small
1843                // `compress-dict`). Larger known-size inputs COPY (re-prime the
1844                // dict into the live tables via `skip_matching_dense`, where the
1845                // dense scan matches it as window history). `skip_matching_for_dict_attach`
1846                // self-gates on `use_fast_loop` (only fast-loop levels carry the
1847                // dual-probe; general-path levels fall back to the dense copy).
1848                let attach = self
1849                    .reset_size_log
1850                    .is_none_or(|log| log <= DFAST_ATTACH_DICT_CUTOFF_LOG);
1851                if attach {
1852                    self.dfast_matcher_mut().skip_matching_for_dict_attach();
1853                } else {
1854                    self.dfast_matcher_mut().invalidate_dict_cache();
1855                    self.dfast_matcher_mut().skip_matching_dense();
1856                }
1857            }
1858            super::strategy::BackendTag::Row => {
1859                // Donor `ZSTD_RowFindBestMatch` `dictMatchState`: small /
1860                // unknown-size inputs ATTACH (build the separate immutable dict
1861                // row index; the bounded dual-probe in `row_candidate_rl`
1862                // searches live + dict, avoiding the per-frame dict re-index),
1863                // larger known-size inputs COPY (dense re-prime into the live
1864                // rows).
1865                let attach = self
1866                    .reset_size_log
1867                    .is_none_or(|log| log <= ROW_ATTACH_DICT_CUTOFF_LOG);
1868                if attach {
1869                    self.row_matcher_mut().prime_dict_attach_current_block();
1870                } else {
1871                    self.row_matcher_mut().invalidate_dict_cache();
1872                    self.row_matcher_mut().skip_matching_with_hint(Some(false));
1873                }
1874            }
1875            super::strategy::BackendTag::HashChain => {
1876                let table = &mut self.hc_matcher_mut().table;
1877                if table.uses_bt {
1878                    // BT / optimal levels: keep the dict in history for the dms
1879                    // but do NOT insert it into the live tree (donor separate
1880                    // dictMatchState). Lazy-HC levels still index the dict into
1881                    // the live chain (they have no dms).
1882                    table.skip_matching_dict_bt();
1883                } else {
1884                    self.hc_matcher_mut().skip_matching(Some(false));
1885                }
1886            }
1887        }
1888    }
1889}
1890
1891impl Matcher for MatchGeneratorDriver {
1892    fn supports_dictionary_priming(&self) -> bool {
1893        true
1894    }
1895
1896    fn set_source_size_hint(&mut self, size: u64) {
1897        self.source_size_hint = Some(size);
1898    }
1899
1900    fn set_dictionary_size_hint(&mut self, size: usize) {
1901        self.dictionary_size_hint = Some(size);
1902    }
1903
1904    /// Heap bytes this driver owns: the active backend's tables/history, the
1905    /// recycled input-buffer pool, and the primed-dictionary snapshot (a cloned
1906    /// backend kept for CDict-equivalent reuse). The inline struct itself is
1907    /// accounted by the owner's `size_of`.
1908    fn heap_size(&self) -> usize {
1909        let pool: usize = self.vec_pool.capacity() * core::mem::size_of::<Vec<u8>>()
1910            + self.vec_pool.iter().map(Vec::capacity).sum::<usize>();
1911        let snapshot = self
1912            .primed
1913            .as_ref()
1914            .map_or(0, |(storage, _, _)| storage.heap_size());
1915        pool + self.storage.heap_size() + snapshot
1916    }
1917
1918    fn clear_param_overrides(&mut self) {
1919        self.param_overrides = None;
1920    }
1921
1922    fn reset(&mut self, level: CompressionLevel) {
1923        let hint = self.source_size_hint.take();
1924        let dict_hint = self.dictionary_size_hint.take();
1925        // Snapshot the hint's normalized ceil-log bucket for the primed-snapshot
1926        // key and prime_with_dictionary's attach/copy mode decision (the hint is
1927        // consumed here, but priming happens just after reset). Storing the
1928        // bucket rather than the raw bytes means two hints that resolve to the
1929        // same matcher shape share one snapshot instead of each re-priming.
1930        self.reset_size_log = hint.map(source_size_ceil_log);
1931        let hinted = hint.is_some();
1932        #[cfg_attr(not(test), allow(unused_mut))]
1933        let mut params = Self::level_params(level, hint);
1934        // Test-only: apply a parse×search override so the matrix can be
1935        // exercised without editing `LEVEL_TABLE`. Mutating `params` here
1936        // (before `next_backend`) flows the override through storage
1937        // selection, `configure`, and the `self.search`/`self.parse`
1938        // writes uniformly. Consumed with `take()` so it is one-shot: the
1939        // synthetic pairing applies to exactly this `reset()`, and a later
1940        // reset on the same driver falls back to the level's real config.
1941        #[cfg(test)]
1942        if let Some((search, parse)) = self.config_override.take() {
1943            params.search = search;
1944            params.lazy_depth = parse.lazy_depth();
1945            // The matrix sweep can pair a level with a backend its native
1946            // row doesn't populate (e.g. greedy L5, which carries only `row`,
1947            // run on HashChain). Synthesize a default config for the
1948            // overridden backend so its `configure` arm has something to read.
1949            use super::strategy::SearchMethod;
1950            match search {
1951                SearchMethod::Fast => {
1952                    params.fast.get_or_insert(FAST_L1);
1953                }
1954                SearchMethod::DoubleFast => {
1955                    params.dfast.get_or_insert(DFAST_L3);
1956                }
1957                SearchMethod::RowHash => {
1958                    params.row.get_or_insert(ROW_CONFIG);
1959                }
1960                SearchMethod::HashChain | SearchMethod::BinaryTree => {
1961                    params.hc.get_or_insert(HC_CONFIG);
1962                }
1963            }
1964        }
1965        // Public-parameter overrides (#27): apply the per-knob set on top
1966        // of the level-resolved params. A strategy override re-routes the
1967        // backend, so this must precede `next_backend` selection. The
1968        // all-`None` case is skipped so default level geometry stays
1969        // byte-identical to plain level-based compression.
1970        if let Some(ov) = self.param_overrides
1971            && !ov.is_empty()
1972        {
1973            apply_param_overrides(&mut params, &ov);
1974            // `Self::level_params(level, hint)` applied the source-size cap
1975            // for the LEVEL's native backend. If a strategy override moved
1976            // the frame onto a different backend, `apply_param_overrides`
1977            // synthesized that backend's DEFAULT config (FAST_L1 /
1978            // HC_OVERRIDE_DEFAULT) with full-size table logs AFTER that cap
1979            // ran. Re-apply the hint cap so a tiny hinted frame doesn't
1980            // allocate the new backend's full-size tables. An explicit
1981            // `window_log` override is the user's hard request and must
1982            // survive the re-cap, so restore it afterwards.
1983            if let Some(hint_size) = hint {
1984                params = adjust_params_for_source_size(params, hint_size);
1985                if let Some(window_log) = ov.window_log {
1986                    params.window_log = window_log;
1987                }
1988            }
1989        }
1990        // Dictionary-driven table sizing — parity with donor `ZSTD_createCDict`
1991        // (`ZSTD_getCParams_internal(level, UNKNOWN, dictSize, ZSTD_cpm_createCDict)`
1992        // → `ZSTD_adjustCParams_internal`). A loaded dictionary supplies the
1993        // long-distance matches, so donor sizes the prepared match-finder tables
1994        // to the DICTIONARY (assuming a `minSrcSize` source), not the live
1995        // window: it downsizes `hashLog`/`chainLog` toward the dict-and-window
1996        // log while leaving the frame's eviction `window_log` source-derived so
1997        // the dictionary bytes stay referenceable (`ZSTD_resetCCtx_byCopyingCDict`
1998        // copies the small CDict tables but keeps the source window). We apply
1999        // the same downsizing to the level's own hc geometry and cap (min) so a
2000        // dict never inflates the level tables. Only the binary-tree / hash-chain
2001        // backend reads `hc.{hash,chain}_log`; Simple/Dfast/Row derive their
2002        // widths from the source window in their `reset` arms.
2003        // A zero-length dictionary is "no dictionary": running the CDict sizing
2004        // path for `Some(0)` is not a no-op — `cdict_table_logs(.., 0)` still
2005        // collapses the HC/BT tables toward the 513-byte donor tier via
2006        // `DICT_MIN_SRC_SIZE`, tanking ratio/perf on the next frame. Priming
2007        // already treats empty content as empty, so skip the downsizing here too.
2008        if let Some(dict_size) = dict_hint.filter(|&size| size > 0) {
2009            // Derive the dict-tier geometry from the level's FULL (un-source-capped)
2010            // hc widths. `Self::level_params(level, hint)` already source-capped
2011            // `params.hc`; feeding those capped widths into `cdict_table_logs` and
2012            // then `.min()`-ing would double-cap, so on a small hinted source with a
2013            // large dictionary the prepared tables collapse below what the dict needs
2014            // — defeating the `ZSTD_createCDict` geometry this mirrors. Take the
2015            // un-hinted base widths instead and assign the result directly:
2016            // `cdict_table_logs` only ever downsizes, so it never exceeds the base
2017            // level geometry, while the eviction `window_log` stays source-derived so
2018            // the dictionary bytes remain referenceable. Active public-parameter
2019            // overrides (#27) are applied to the base too, so a strategy override
2020            // that routes onto HashChain/BinaryTree still gets dict-tier sizing and
2021            // explicit hash/chain overrides feed through as the geometry ceiling.
2022            let mut base_params = Self::level_params(level, None);
2023            if let Some(ov) = self.param_overrides
2024                && !ov.is_empty()
2025            {
2026                apply_param_overrides(&mut base_params, &ov);
2027            }
2028            if let (Some(hc), Some(base_hc)) = (params.hc.as_mut(), base_params.hc) {
2029                let uses_bt = matches!(
2030                    params.strategy_tag,
2031                    super::strategy::StrategyTag::Btlazy2
2032                        | super::strategy::StrategyTag::BtOpt
2033                        | super::strategy::StrategyTag::BtUltra
2034                        | super::strategy::StrategyTag::BtUltra2
2035                );
2036                let (dict_hash_log, dict_chain_log) = cdict_table_logs(
2037                    params.window_log,
2038                    base_hc.hash_log,
2039                    base_hc.chain_log,
2040                    uses_bt,
2041                    dict_size,
2042                );
2043                hc.hash_log = dict_hash_log;
2044                hc.chain_log = dict_chain_log;
2045            }
2046        }
2047        let next_backend = params.backend();
2048        let max_window_size = 1usize << params.window_log;
2049        self.dictionary_retained_budget = 0;
2050        // Drop any frame-local borrowed staging so it can't leak across a
2051        // reset and misroute the next start/skip into borrowed dispatch.
2052        self.borrowed_pending = None;
2053        if self.active_backend() != next_backend {
2054            // Drain the outgoing backend's allocations into the shared
2055            // pool. The `match &mut self.storage { ... }` block runs to
2056            // completion before the assignment below replaces the
2057            // variant, so the inner state we just drained is dropped
2058            // with the old variant.
2059            match &mut self.storage {
2060                MatcherStorage::Simple(_m) => {
2061                    // FastKernelMatcher owns a flat Vec<u8> history
2062                    // and a Vec<u32> hash table — both drop with the
2063                    // variant assignment below, no per-block buffers
2064                    // to recycle into the driver pools. The
2065                    // assignment-replace path collapses to a noop
2066                    // pre-pass for this backend.
2067                }
2068                MatcherStorage::Dfast(m) => {
2069                    // Drop the long / short hash table allocations
2070                    // before calling `m.reset`. Without this prepass,
2071                    // `DfastMatchGenerator::reset` would `fill` both
2072                    // tables with `DFAST_EMPTY_SLOT` sentinels — wasted
2073                    // work given the next assignment to `self.storage`
2074                    // is about to drop `m` entirely. `reset` itself
2075                    // short-circuits on `if !self.short_hash.is_empty()`,
2076                    // so handing it an empty `Vec` skips the fill loop.
2077                    // Mirrors the pre-drain pattern in the HashChain
2078                    // arm below (and serves the same peak-memory
2079                    // purpose: release the table-allocation footprint
2080                    // before constructing the replacement variant).
2081                    m.short_hash = Vec::new();
2082                    m.long_hash = Vec::new();
2083                    m.reset();
2084                }
2085                MatcherStorage::Row(m) => {
2086                    m.row_heads = Vec::new();
2087                    m.row_positions = Vec::new();
2088                    m.row_tags = Vec::new();
2089                    m.reset();
2090                }
2091                MatcherStorage::HashChain(m) => {
2092                    // Release oversized tables when switching away from
2093                    // HashChain so Best's larger allocations don't persist.
2094                    // hash3_table must be released alongside the other
2095                    // two: BtUltra2's `1 << HC3_HASH_LOG` entries would
2096                    // otherwise stay pinned across the backend switch,
2097                    // even though no future caller of this backend will
2098                    // touch them.
2099                    m.table.hash_table = Vec::new();
2100                    m.table.chain_table = Vec::new();
2101                    m.table.hash3_table = Vec::new();
2102                    let vec_pool = &mut self.vec_pool;
2103                    m.reset(|mut data| {
2104                        data.resize(data.capacity(), 0);
2105                        vec_pool.push(data);
2106                    });
2107                }
2108            }
2109            // Swap in a fresh variant for the new backend. The previous
2110            // `storage` is dropped here.
2111            self.storage = match next_backend {
2112                super::strategy::BackendTag::Simple => {
2113                    // Per-level Fast cParams from resolve_level_params:
2114                    // Level(1) gets (hash_log=14, mls=7); Level(-7..=-1)
2115                    // get donor row-0 (hash_log=13, mls=7); Fastest /
2116                    // Uncompressed keep (hash_log=14, mls=6). See
2117                    // resolve_level_params for rationale.
2118                    let fast = params.fast.expect("Fast level row carries a FastConfig");
2119                    MatcherStorage::Simple(FastKernelMatcher::with_params(
2120                        params.window_log,
2121                        fast.hash_log,
2122                        fast.mls,
2123                        fast.step_size,
2124                    ))
2125                }
2126                super::strategy::BackendTag::Dfast => {
2127                    MatcherStorage::Dfast(DfastMatchGenerator::new(max_window_size))
2128                }
2129                super::strategy::BackendTag::Row => {
2130                    MatcherStorage::Row(RowMatchGenerator::new(max_window_size))
2131                }
2132                super::strategy::BackendTag::HashChain => {
2133                    MatcherStorage::HashChain(HcMatchGenerator::new(max_window_size))
2134                }
2135            };
2136        }
2137
2138        // Single source of truth: `LevelParams::strategy_tag` is the
2139        // authoritative mapping from `CompressionLevel` to strategy.
2140        // `storage.backend()` derives the parse family from the variant,
2141        // so there is no separate runtime tag that could drift against
2142        // `LEVEL_TABLE`.
2143        self.strategy_tag = params.strategy_tag;
2144        self.search = params.search;
2145        self.parse = params.parse();
2146        self.slice_size = self.base_slice_size.min(max_window_size);
2147        self.reported_window_size = max_window_size;
2148        let strategy_tag = self.strategy_tag;
2149        // Source-proportional table window for the backends whose hash-table
2150        // widths are recomputed here (Dfast / Row). Like the HC / Fast caps
2151        // in `adjust_params_for_source_size`, this sizes the internal tables
2152        // from the RAW source log (not the wire `window_log` floor) so a
2153        // small frame zeroes a small table; it never exceeds the real window.
2154        let table_window_size = match hint {
2155            Some(h) => {
2156                let raw_log = source_size_ceil_log(h);
2157                // Clamp the shift below the pointer width before `1usize <<`:
2158                // an oversized hint (>= 2^63 + 1, and on 32-bit usize any hint
2159                // >= 2^32) drives `raw_log` to 64 / >= 32, and the shift would
2160                // overflow (panic in debug, wrap to 0 in release) before the
2161                // `.min(max_window_size)` cap below could bound it. The min cap
2162                // still provides the real semantic window bound.
2163                let shift = raw_log.max(MIN_WINDOW_LOG).min(usize::BITS as u8 - 1);
2164                (1usize << shift).min(max_window_size)
2165            }
2166            None => max_window_size,
2167        };
2168        // The hint-dependent hash-table width the active backend applies, for
2169        // the primed-snapshot key. Dfast/Row compute it from `table_window_size`
2170        // below; HC/Fast leave it `0` because their widths live in `params`
2171        // (`hc.{hash,chain}_log` / `fast_hash_log`) — already part of the key.
2172        let mut resolved_table_bits: usize = 0;
2173        match &mut self.storage {
2174            MatcherStorage::Simple(m) => {
2175                // Per-level Fast cParams threaded from
2176                // resolve_level_params (see Simple-backend swap
2177                // arm above for the (level → params) mapping).
2178                let fast = params.fast.expect("Fast level row carries a FastConfig");
2179                // Same attach/copy split the dict-prime dispatch applies
2180                // below (`prime_with_dictionary`): only attach-mode dict
2181                // frames may keep the main table across the reset via an
2182                // epoch advance — copy-mode and no-dict frames must memset
2183                // it back to bias 0 for the raw-slice kernels.
2184                // `Some(0)` is "no dictionary" (the dict-sizing path above
2185                // filters it the same way): an empty dict primes nothing, so
2186                // an epoch-advance reset would preserve stale attach state
2187                // instead of clearing it.
2188                let dict_attach_epoch = matches!(dict_hint, Some(size) if size > 0)
2189                    && self
2190                        .reset_size_log
2191                        .is_none_or(|log| log <= FAST_ATTACH_DICT_CUTOFF_LOG);
2192                // Copy-mode dictionary frame whose primed snapshot matches
2193                // this exact resolved shape: `restore_primed_dictionary`
2194                // (called right after this reset; the caller gates the
2195                // restore on the same size bucket and the restore re-checks
2196                // the same key) will `clone_from` the snapshot over this
2197                // matcher, replacing the table contents and bias wholesale —
2198                // the reset's full-table memset would be thrown away. The
2199                // key components mirror `reset_shape` below: Simple leaves
2200                // `resolved_table_bits` 0, never carries an LDM override,
2201                // and `fast_attach` is false in copy mode by construction.
2202                let table_overwritten_by_restore = matches!(dict_hint, Some(size) if size > 0)
2203                    && !dict_attach_epoch
2204                    && self.primed.as_ref().is_some_and(|(_, _, captured)| {
2205                        *captured
2206                            == PrimedKey {
2207                                level,
2208                                params,
2209                                table_bits: 0,
2210                                fast_attach: false,
2211                                ldm: None,
2212                            }
2213                    });
2214                m.reset(
2215                    params.window_log,
2216                    fast.hash_log,
2217                    fast.mls,
2218                    fast.step_size,
2219                    dict_attach_epoch,
2220                    table_overwritten_by_restore,
2221                );
2222            }
2223            MatcherStorage::Dfast(dfast) => {
2224                dfast.max_window_size = max_window_size;
2225                let dcfg = params
2226                    .dfast
2227                    .expect("Dfast level row must carry a DfastConfig");
2228                // Donor `cParams.hashLog`/`chainLog`, capped by the
2229                // source-size window when hinted so tiny inputs don't
2230                // over-allocate.
2231                let long_bits = if hinted {
2232                    dfast_hash_bits_for_window(table_window_size).min(dcfg.long_hash_log as usize)
2233                } else {
2234                    dcfg.long_hash_log as usize
2235                };
2236                let short_bits = if hinted {
2237                    dfast_hash_bits_for_window(table_window_size).min(dcfg.short_hash_log as usize)
2238                } else {
2239                    dcfg.short_hash_log as usize
2240                };
2241                resolved_table_bits = long_bits;
2242                dfast.set_hash_bits(long_bits, short_bits);
2243                // Dfast holds no per-block input Vecs (history owns the
2244                // bytes and `add_data` returns each Vec eagerly), so
2245                // `reset` takes no `reuse_space` callback.
2246                dfast.reset();
2247            }
2248            MatcherStorage::Row(row) => {
2249                row.max_window_size = max_window_size;
2250                row.lazy_depth = params.lazy_depth;
2251                let mut row_cfg = params.row.expect("Row level row carries a RowConfig");
2252                if hinted {
2253                    // Clamp the configured hash width by the hinted window
2254                    // (donor `ZSTD_adjustCParams` caps hashLog by windowLog) —
2255                    // `min`, not replace, so an explicit `hash_log` param
2256                    // override (`row_cfg.hash_bits`) survives the hinted path
2257                    // instead of being overwritten by the window value.
2258                    //
2259                    // Clamp BEFORE `configure` so the backend sees ONE width
2260                    // per frame. Configuring with the unclamped level width
2261                    // and then re-clamping made `row_hash_log` oscillate on
2262                    // every hinted frame, and each width change clears the
2263                    // row tables — `ensure_tables` then re-filled all three
2264                    // every frame in a reused compressor.
2265                    row_cfg.hash_bits = row_cfg
2266                        .hash_bits
2267                        .min(row_hash_bits_for_window(table_window_size));
2268                }
2269                row.configure(row_cfg);
2270                // Key the primed snapshot on the width the backend ACTUALLY
2271                // applied (`set_hash_bits` clamps the request): recording the
2272                // request — or the 0 default on the unhinted path — keys
2273                // identical table geometries apart and forces needless
2274                // dictionary re-primes.
2275                resolved_table_bits = row.hash_bits();
2276                row.reset();
2277            }
2278            MatcherStorage::HashChain(hc) => {
2279                hc.table.max_window_size = max_window_size;
2280                hc.hc.lazy_depth = params.lazy_depth;
2281                let mut hc_cfg = params.hc.expect("HashChain level row carries an HcConfig");
2282                // Cap the hash / chain table logs by the hinted window so a small
2283                // input doesn't allocate the full level's tables (the donor
2284                // `ZSTD_adjustCParams_internal` clamp: `hashLog <= windowLog + 1`,
2285                // and `cycleLog <= windowLog` — `cycleLog == chainLog` for the HC
2286                // finder, `chainLog - 1` for the BT pair table, so `chainLog <=
2287                // windowLog` (+1 for BT)). Ratio-neutral: a hinted window of
2288                // `2^wlog` bytes holds at most `2^wlog` positions, so the slots
2289                // beyond that are never populated — capping only sheds unused
2290                // allocation. Was the source of L10-lazy peak-alloc ~2.15x the
2291                // donor on a 1 MiB input. Only applied when hinted; an
2292                // unknown-size stream keeps the full level tables.
2293                if hinted {
2294                    let wlog = hc_hash_bits_for_window(table_window_size);
2295                    let uses_bt = matches!(
2296                        strategy_tag,
2297                        super::strategy::StrategyTag::Btlazy2
2298                            | super::strategy::StrategyTag::BtOpt
2299                            | super::strategy::StrategyTag::BtUltra
2300                            | super::strategy::StrategyTag::BtUltra2
2301                    );
2302                    hc_cfg.hash_log = hc_cfg.hash_log.min(wlog + 1);
2303                    hc_cfg.chain_log = hc_cfg.chain_log.min(if uses_bt { wlog + 1 } else { wlog });
2304                }
2305                hc.configure(hc_cfg, strategy_tag, params.window_log);
2306                let vec_pool = &mut self.vec_pool;
2307                hc.reset(|mut data| {
2308                    data.resize(data.capacity(), 0);
2309                    vec_pool.push(data);
2310                });
2311                // When the source size is known, pre-size the history mirror to
2312                // the expected total (dictionary + payload) so per-block growth
2313                // does not overshoot via Vec capacity doubling (donor sizes its
2314                // window buffer exactly). Dominates peak once the match-finder
2315                // tables are dictionary-tier-small. Unhinted streams skip this
2316                // and keep doubling growth.
2317                if let Some(src) = hint {
2318                    // `src` is a u64 hint and may be the u64::MAX "unknown
2319                    // size" sentinel, which truncates under `as usize` on
2320                    // 32-bit targets and overflows when the dict hint is
2321                    // added. Saturate the source size, then saturate the
2322                    // dict-hint addition; `reserve_history` applies the
2323                    // tighter window ceiling to the result.
2324                    let src_hint = usize::try_from(src).unwrap_or(usize::MAX);
2325                    let expected = src_hint.saturating_add(dict_hint.unwrap_or(0));
2326                    hc.table.reserve_history(expected);
2327                }
2328            }
2329        }
2330        // LDM wiring (#27): attach (or clear) the long-distance-match
2331        // producer on the optimal (BT) backend. LDM is the only
2332        // back-reference path that crosses the regular window, so it
2333        // only has a home on the `BtMatcher`; non-BT strategies drop the
2334        // producer. Built AFTER `hc.reset()` because `BtMatcher::reset`
2335        // clears an existing producer's table but does not null the
2336        // slot — installing here gives the new frame a fresh producer.
2337        #[cfg(feature = "hash")]
2338        if let MatcherStorage::HashChain(hc) = &mut self.storage {
2339            let producer = self
2340                .param_overrides
2341                .as_ref()
2342                .and_then(|ov| ov.ldm)
2343                .map(|ldm_ov| {
2344                    let strategy_ord = ldm_strategy_ordinal(params.strategy_tag, params.lazy_depth);
2345                    // Seed the caller-pinned knobs, then run the donor
2346                    // derivation over the seed so the remaining (zero)
2347                    // fields are filled with cross-field consistency
2348                    // (e.g. `hash_rate_log = window_log - hash_log`).
2349                    // Clobbering after `adjust_for` would break that and
2350                    // hand the producer an inconsistent set.
2351                    let seed = super::ldm::params::LdmParams {
2352                        window_log: params.window_log as u32,
2353                        hash_log: ldm_ov.hash_log.unwrap_or(0),
2354                        hash_rate_log: ldm_ov.hash_rate_log.unwrap_or(0),
2355                        min_match_length: ldm_ov.min_match.unwrap_or(0),
2356                        bucket_size_log: ldm_ov.bucket_size_log.unwrap_or(0),
2357                    };
2358                    super::ldm::LdmProducer::new(seed.derive(strategy_ord))
2359                });
2360            hc.set_ldm_producer(producer);
2361        }
2362        // Record the resolved matcher shape for the primed-snapshot key. Captured
2363        // here (post-resolution, after the test-only param override) so the key
2364        // reflects exactly the geometry the restored `storage` must match. The
2365        // Fast attach-vs-copy mode is part of the shape ONLY for the Simple
2366        // backend (it decides whether a separate dict table is built); the other
2367        // backends prime the dictionary the same way regardless, so including
2368        // the bit there would over-key identical resolved shapes. When it
2369        // applies it matches the decision `prime_with_dictionary` makes from the
2370        // same `reset_size_log`.
2371        let fast_attach = matches!(next_backend, super::strategy::BackendTag::Simple)
2372            && self
2373                .reset_size_log
2374                .is_none_or(|log| log <= FAST_ATTACH_DICT_CUTOFF_LOG);
2375        // The LDM override is part of the snapshot identity ONLY on the
2376        // optimal (BinaryTree) path: that is the only backend whose cloned
2377        // `storage` carries a `BtMatcher::ldm_producer`. On Fast / Dfast /
2378        // Row and lazy-HashChain resets the producer slot does not exist,
2379        // so folding the override there would over-key the snapshot and
2380        // force needless re-primes when LDM is toggled. Gated like
2381        // `fast_attach` (a key bit only participates where it changes the
2382        // cloned matcher shape).
2383        let active_ldm = if matches!(params.search, super::strategy::SearchMethod::BinaryTree) {
2384            self.param_overrides.and_then(|ov| ov.ldm)
2385        } else {
2386            None
2387        };
2388        self.reset_shape = Some((params, resolved_table_bits, fast_attach, active_ldm));
2389    }
2390
2391    fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
2392        match self.active_backend() {
2393            super::strategy::BackendTag::Simple => {
2394                // Routes through prime_offset_history so BOTH
2395                // offset_hist (wire encoder) and rep[0..2] (kernel)
2396                // are updated atomically. Without this, the two
2397                // tracks drift after dict priming — kernel emits
2398                // repcode matches against stale FAST_INITIAL_REP
2399                // while the wire encoder uses the primed history,
2400                // producing divergent wire encoding (Copilot review
2401                // #15 on #216).
2402                self.simple_mut().prime_offset_history(offset_hist);
2403            }
2404            super::strategy::BackendTag::Dfast => {
2405                self.dfast_matcher_mut().offset_hist = offset_hist
2406            }
2407            super::strategy::BackendTag::Row => self.row_matcher_mut().offset_hist = offset_hist,
2408            super::strategy::BackendTag::HashChain => {
2409                let matcher = self.hc_matcher_mut();
2410                matcher.table.offset_hist = offset_hist;
2411                matcher.table.mark_dictionary_primed();
2412            }
2413        }
2414
2415        if dict_content.is_empty() {
2416            return;
2417        }
2418
2419        // Dictionary bytes should stay addressable until produced frame output
2420        // itself exceeds the live window size. We bump `max_window_size`
2421        // by the dictionary length so the eviction band keeps the
2422        // primed bytes in `history`.
2423        //
2424        // Cap: `with_params`/`reset` enforce `window_log <= 30` so the
2425        // eviction band `2 * max_window_size` stays below `u32::MAX`
2426        // with headroom for one MAX_BLOCK_SIZE pending block — the
2427        // kernel asserts `data.len() <= u32::MAX`. A large enough
2428        // dictionary could otherwise push `max_window_size` past
2429        // that ceiling via the `saturating_add` below and silently
2430        // re-introduce the same overflow the `window_log` cap was
2431        // designed to prevent. Clamp the post-priming size so the
2432        // doubled-band-plus-block invariant survives.
2433        const MAX_PRIMED_WINDOW_SIZE: usize =
2434            (u32::MAX as usize - crate::common::MAX_BLOCK_SIZE as usize) / 2;
2435
2436        // `requested_dict_budget` is what the caller asked for;
2437        // `base_max_window_size` snapshots the pre-priming cap so we
2438        // can compute how much window the cap actually GRANTED below.
2439        // The cap may clip the requested growth, in which case the
2440        // bookkeeping (`dictionary_retained_budget` retire path) must
2441        // track only the granted portion — otherwise
2442        // `retire_dictionary_budget()` would later reclaim more than
2443        // was actually added and shrink the matcher below its real
2444        // base window (and `cap = 2 * max_window_size` would shrink
2445        // with it, risking under-allocation on subsequent commits).
2446        // The `granted_retained_budget` calculation further below is
2447        // the load-bearing piece — see its block-level comment for
2448        // the post-clip / post-uncommitted-tail math.
2449        let requested_dict_budget = dict_content.len();
2450        let base_max_window_size = match self.active_backend() {
2451            super::strategy::BackendTag::Simple => self.simple_mut().max_window_size,
2452            super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().max_window_size,
2453            super::strategy::BackendTag::Row => self.row_matcher_mut().max_window_size,
2454            super::strategy::BackendTag::HashChain => self.hc_matcher_mut().table.max_window_size,
2455        };
2456        match self.active_backend() {
2457            super::strategy::BackendTag::Simple => {
2458                let matcher = self.simple_mut();
2459                matcher.max_window_size = matcher
2460                    .max_window_size
2461                    .saturating_add(requested_dict_budget)
2462                    .min(MAX_PRIMED_WINDOW_SIZE);
2463            }
2464            super::strategy::BackendTag::Dfast => {
2465                let matcher = self.dfast_matcher_mut();
2466                matcher.max_window_size = matcher
2467                    .max_window_size
2468                    .saturating_add(requested_dict_budget)
2469                    .min(MAX_PRIMED_WINDOW_SIZE);
2470            }
2471            super::strategy::BackendTag::Row => {
2472                let matcher = self.row_matcher_mut();
2473                matcher.max_window_size = matcher
2474                    .max_window_size
2475                    .saturating_add(requested_dict_budget)
2476                    .min(MAX_PRIMED_WINDOW_SIZE);
2477            }
2478            super::strategy::BackendTag::HashChain => {
2479                let matcher = self.hc_matcher_mut();
2480                matcher.table.max_window_size = matcher
2481                    .table
2482                    .max_window_size
2483                    .saturating_add(requested_dict_budget)
2484                    .min(MAX_PRIMED_WINDOW_SIZE);
2485            }
2486        }
2487
2488        let mut start = 0usize;
2489        let mut committed_dict_budget = 0usize;
2490        // insert_position needs 4 bytes of lookahead for hashing;
2491        // backfill_boundary_positions re-visits tail positions once the
2492        // next slice extends history, but cannot hash <4 byte fragments.
2493        let min_primed_tail = match self.active_backend() {
2494            super::strategy::BackendTag::Simple => MIN_MATCH_LEN,
2495            super::strategy::BackendTag::Dfast
2496            | super::strategy::BackendTag::Row
2497            | super::strategy::BackendTag::HashChain => 4,
2498        };
2499        while start < dict_content.len() {
2500            let end = (start + self.slice_size).min(dict_content.len());
2501            if end - start < min_primed_tail {
2502                break;
2503            }
2504            let mut space = self.get_next_space();
2505            space.clear();
2506            space.extend_from_slice(&dict_content[start..end]);
2507            self.commit_space(space);
2508            self.skip_matching_for_dictionary_priming();
2509            committed_dict_budget += end - start;
2510            start = end;
2511        }
2512
2513        // Derive `granted_retained_budget` directly from the two real
2514        // bounds — bytes actually committed and bytes the cap allows
2515        // — instead of doing a cap-clip pass followed by an
2516        // uncommitted-tail subtract. Previous shape double-discounted
2517        // when the cap clipped: clip lost `(requested - allowed)`,
2518        // then tail-subtract lost ANOTHER `(requested - committed)`,
2519        // leaving `max_window_size` shy of the dictionary that was
2520        // actually retained (e.g. cap=900, committed=998, uncommitted=2
2521        // landed at granted=898 instead of the correct 900).
2522        let capped_retained_budget = MAX_PRIMED_WINDOW_SIZE.saturating_sub(base_max_window_size);
2523        let granted_retained_budget = committed_dict_budget.min(capped_retained_budget);
2524        let final_max_window_size = base_max_window_size.saturating_add(granted_retained_budget);
2525        match self.active_backend() {
2526            super::strategy::BackendTag::Simple => {
2527                self.simple_mut().max_window_size = final_max_window_size;
2528            }
2529            super::strategy::BackendTag::Dfast => {
2530                self.dfast_matcher_mut().max_window_size = final_max_window_size;
2531            }
2532            super::strategy::BackendTag::Row => {
2533                self.row_matcher_mut().max_window_size = final_max_window_size;
2534            }
2535            super::strategy::BackendTag::HashChain => {
2536                self.hc_matcher_mut().table.max_window_size = final_max_window_size;
2537            }
2538        }
2539        if granted_retained_budget > 0 {
2540            self.dictionary_retained_budget = self
2541                .dictionary_retained_budget
2542                .saturating_add(granted_retained_budget);
2543        }
2544        if self.active_backend() == super::strategy::BackendTag::HashChain {
2545            let table = &mut self.hc_matcher_mut().table;
2546            table.set_dictionary_limit_from_primed_bytes(committed_dict_budget);
2547            // Build the dictMatchState chain for BT/optimal levels so the
2548            // collect dual-probes the dictionary with its own compare budget
2549            // (the dict bytes were just committed to the front of history).
2550            if table.uses_bt {
2551                table.prime_dms_bt(committed_dict_budget);
2552            }
2553        }
2554        // CDict-equivalent: now that every dict chunk is indexed, mark the
2555        // Fast-backend dict table primed so the next frame's re-prime reuses
2556        // it (skips the re-hash) while still re-committing the dict bytes to
2557        // history. No-op when the attach path built no table (copy mode or a
2558        // sub-8-byte dict) — `mark_dict_primed` self-guards on table presence.
2559        match self.active_backend() {
2560            super::strategy::BackendTag::Simple => self.simple_mut().mark_dict_primed(),
2561            super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().mark_dict_primed(),
2562            super::strategy::BackendTag::Row => self.row_matcher_mut().mark_dict_primed(),
2563            _ => {}
2564        }
2565    }
2566
2567    fn restore_primed_dictionary(&mut self, level: super::CompressionLevel) -> bool {
2568        // Only the (storage, dictionary_retained_budget) pair is what
2569        // `prime_with_dictionary` writes; restoring them reproduces the
2570        // post-prime state exactly. Gated on the FULL resolved key (level + the
2571        // resolved `LevelParams` + the active backend's table width), not just
2572        // the level: `reset` resolves the hint into a window/table geometry, so a
2573        // same-level snapshot taken at a hint that resolved to a different shape
2574        // carries a `storage.max_window_size` / table dimensions that no longer
2575        // match this reset. Restoring it would let the encoder search past the
2576        // frame header's window (an undecodable match), so on a key mismatch we
2577        // refuse and the caller re-primes.
2578        let Some((params, table_bits, fast_attach, ldm)) = self.reset_shape else {
2579            return false;
2580        };
2581        let key = PrimedKey {
2582            level,
2583            params,
2584            table_bits,
2585            fast_attach,
2586            ldm,
2587        };
2588        let Some((snapshot, budget, captured_key)) = &self.primed else {
2589            return false;
2590        };
2591        if *captured_key != key {
2592            return false;
2593        }
2594        let budget = *budget;
2595        match (&mut self.storage, snapshot) {
2596            // Same-variant Fast restore: copy the snapshot into the retained
2597            // live storage. `clone_from` reuses the history / hash-table /
2598            // dict-table buffers, so this is the donor CDict table-copy
2599            // regime's cost (pure copies) instead of a full per-frame
2600            // allocation + copy + drop cycle.
2601            (MatcherStorage::Simple(live), MatcherStorage::Simple(snap)) => {
2602                live.clone_from(snap);
2603            }
2604            (live, snapshot_storage) => {
2605                let mut storage = snapshot_storage.clone();
2606                // A binary-tree snapshot is stored WITHOUT its live hash /
2607                // chain / hash3 tables (they hold no dictionary entries — the
2608                // dict lives in `dms` + history; see
2609                // `capture_primed_dictionary`). Re-allocate them zeroed to the
2610                // snapshot's geometry, exactly reproducing the post-prime
2611                // state (all `HC_EMPTY`). This is a full storage replace, so
2612                // no stale live-table entry from a prior frame can survive —
2613                // `ensure_tables` only allocates when the length mismatches,
2614                // so a full (HC / non-BT) snapshot whose tables are already
2615                // present is left untouched.
2616                if let MatcherStorage::HashChain(hc) = &mut storage {
2617                    hc.table.ensure_tables();
2618                }
2619                // The snapshot does not retain the LDM producer (it holds no
2620                // dict state; see `capture_primed_dictionary`). Carry over the
2621                // frame's freshly-reset producer — built this frame by `reset`
2622                // with the same params the snapshot key pins, and empty (no
2623                // input processed yet), so it is equivalent to the producer
2624                // the snapshot was captured with.
2625                #[cfg(feature = "hash")]
2626                {
2627                    let fresh_ldm = if let MatcherStorage::HashChain(hc) = live {
2628                        hc.take_ldm_producer()
2629                    } else {
2630                        None
2631                    };
2632                    if let MatcherStorage::HashChain(hc) = &mut storage {
2633                        hc.set_ldm_producer(fresh_ldm);
2634                    }
2635                }
2636                *live = storage;
2637            }
2638        }
2639        self.dictionary_retained_budget = budget;
2640        true
2641    }
2642
2643    fn capture_primed_dictionary(&mut self, level: super::CompressionLevel) {
2644        // No resolved shape means `reset` has not run for this frame — nothing
2645        // valid to key a snapshot on, so skip the capture.
2646        let Some((params, table_bits, fast_attach, ldm)) = self.reset_shape else {
2647            return;
2648        };
2649        let key = PrimedKey {
2650            level,
2651            params,
2652            table_bits,
2653            fast_attach,
2654            ldm,
2655        };
2656        // Donor CDict-equivalent retained state. On the binary-tree backend the
2657        // dictionary is decoupled into `dms` (the donor `dictMatchState`); the
2658        // live hash / chain / hash3 tables carry NO dict entries at capture
2659        // (`skip_matching_dict_bt` keeps the dict out of the live tree), so they
2660        // are pure zeros. Storing them in the snapshot wastes the full table
2661        // footprint (a second window-tier table set resident for the whole
2662        // compress). Instead, move the live tables OUT of the working storage,
2663        // clone only the dict-state (history + `dms` + window/offset/dict-limit),
2664        // then move the live tables back — the snapshot keeps just what donor's
2665        // CDict keeps, and `restore_primed_dictionary` re-allocates the zeroed
2666        // live tables. HC / lazy levels keep the dict IN the live chain (no
2667        // `dms`), so their snapshot must retain the full tables: full clone.
2668        let bt_decoupled = matches!(
2669            &self.storage,
2670            MatcherStorage::HashChain(hc) if hc.table.uses_bt
2671        );
2672        if bt_decoupled {
2673            let MatcherStorage::HashChain(hc) = &mut self.storage else {
2674                unreachable!("bt_decoupled implies HashChain storage");
2675            };
2676            let hash_table = core::mem::take(&mut hc.table.hash_table);
2677            let chain_table = core::mem::take(&mut hc.table.chain_table);
2678            let hash3_table = core::mem::take(&mut hc.table.hash3_table);
2679            // The LDM producer carries no dictionary state (LDM is not
2680            // dict-primed; its hash table is empty at capture), so it is not
2681            // retained either — `restore` reinstates the frame's freshly-reset
2682            // producer. Take it out so the clone does not duplicate its table.
2683            #[cfg(feature = "hash")]
2684            let ldm_producer = hc.take_ldm_producer();
2685            // Clone the dict-state-only storage (live tables now empty Vecs,
2686            // LDM producer detached).
2687            let snapshot = self.storage.clone();
2688            // Move the live tables (and LDM producer) back into the working storage.
2689            let MatcherStorage::HashChain(hc) = &mut self.storage else {
2690                unreachable!("storage variant is stable across the take/put");
2691            };
2692            hc.table.hash_table = hash_table;
2693            hc.table.chain_table = chain_table;
2694            hc.table.hash3_table = hash3_table;
2695            #[cfg(feature = "hash")]
2696            hc.set_ldm_producer(ldm_producer);
2697            self.primed = Some((snapshot, self.dictionary_retained_budget, key));
2698        } else {
2699            self.primed = Some((self.storage.clone(), self.dictionary_retained_budget, key));
2700        }
2701    }
2702
2703    fn invalidate_primed_dictionary(&mut self) {
2704        self.primed = None;
2705        // Drop the Fast-backend CDict-equivalent table cache too: it is keyed
2706        // to the dictionary being removed / replaced. Left in place, the next
2707        // same-params `reset` would retain it and the kernel would probe a
2708        // dict region whose bytes are no longer re-committed to history.
2709        match self.active_backend() {
2710            super::strategy::BackendTag::Simple => self.simple_mut().invalidate_dict_cache(),
2711            super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().invalidate_dict_cache(),
2712            // Row keeps its attach index across frames (like Simple/Dfast),
2713            // so a dictionary swap must drop its cached dict rows too;
2714            // otherwise the next small/unknown-size frame reuses stale
2715            // attach state through `prime_dict_attach_current_block`.
2716            super::strategy::BackendTag::Row => self.row_matcher_mut().invalidate_dict_cache(),
2717            // The BT dms tree is keyed to the dict bytes; `prime_dms_bt`
2718            // skips the rebuild while its shape matches, so a swapped
2719            // dictionary of the same length would otherwise keep serving the
2720            // OLD dictionary's tree.
2721            super::strategy::BackendTag::HashChain => {
2722                self.hc_matcher_mut().table.dms.invalidate();
2723            }
2724        }
2725    }
2726
2727    fn seed_dictionary_entropy(
2728        &mut self,
2729        huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
2730        ll: Option<&crate::fse::fse_encoder::FSETable>,
2731        ml: Option<&crate::fse::fse_encoder::FSETable>,
2732        of: Option<&crate::fse::fse_encoder::FSETable>,
2733    ) {
2734        if self.active_backend() == super::strategy::BackendTag::HashChain {
2735            self.hc_matcher_mut()
2736                .seed_dictionary_entropy(huff, ll, ml, of);
2737        }
2738    }
2739
2740    fn window_size(&self) -> u64 {
2741        self.reported_window_size as u64
2742    }
2743
2744    fn get_next_space(&mut self) -> Vec<u8> {
2745        if let Some(mut space) = self.vec_pool.pop() {
2746            if space.len() > self.slice_size {
2747                space.truncate(self.slice_size);
2748            }
2749            if space.len() < self.slice_size {
2750                space.resize(self.slice_size, 0);
2751            }
2752            return space;
2753        }
2754        alloc::vec![0; self.slice_size]
2755    }
2756
2757    fn get_last_space(&mut self) -> &[u8] {
2758        match &self.storage {
2759            MatcherStorage::Simple(m) => m.last_committed_space(),
2760            MatcherStorage::Dfast(m) => m.get_last_space(),
2761            MatcherStorage::Row(m) => m.get_last_space(),
2762            MatcherStorage::HashChain(m) => m.table.get_last_space(),
2763        }
2764    }
2765
2766    fn commit_space(&mut self, space: Vec<u8>) {
2767        let mut evicted_bytes = 0usize;
2768        // Split borrows manually so the `add_data` closures can write
2769        // into `vec_pool` while the backend itself holds an exclusive
2770        // borrow via `storage`. (Suffix-store recycling went away
2771        // with the legacy `MatchGenerator`; the FastKernelMatcher
2772        // arm below has no pool interaction.)
2773        let vec_pool = &mut self.vec_pool;
2774        match &mut self.storage {
2775            MatcherStorage::Simple(m) => {
2776                // FastKernelMatcher owns its history as a single
2777                // flat Vec<u8> and the hash table as a Vec<u32> —
2778                // neither recycles into the driver-side pools. The
2779                // eager pre-commit eviction inside
2780                // `FastKernelMatcher::accept_data` drops bytes when
2781                // accepting this block would push history past 2×
2782                // max_window_size; that delta is what feeds
2783                // `evicted_bytes` here via the `pre / post`
2784                // history-length comparison.
2785                let pre = m.history_len_for_eviction_accounting();
2786                m.accept_data(space);
2787                let post = m.history_len_for_eviction_accounting();
2788                // `accept_data` performs eager pre-commit window
2789                // eviction (so this `pre - post` delta correctly
2790                // feeds the dictionary-budget retire flow). See
2791                // `FastKernelMatcher::accept_data` for the
2792                // commit-time-visibility rationale (closes #216
2793                // CodeRabbit review #5 / Copilot review #1: without
2794                // eager eviction, the delta was always 0 and the
2795                // dict budget never retired, leaving max_window_size
2796                // inflated post-dict-prime → matcher could emit
2797                // offsets exceeding the frame header's window).
2798                evicted_bytes += pre.saturating_sub(post);
2799            }
2800            MatcherStorage::Dfast(m) => {
2801                // Dfast's `add_data` callback receives the INPUT
2802                // `Vec<u8>` for pool recycling (Dfast stores its
2803                // bytes in the contiguous `history` buffer, not in
2804                // per-block Vecs — there is no per-block buffer to
2805                // pop off and hand back). Counting `data.len()` as
2806                // evicted bytes would conflate "new bytes ingested"
2807                // with "old bytes evicted from window"; the two
2808                // happen to coincide when the previous window was
2809                // saturated and the new input fills it 1:1, but
2810                // diverge when the eviction pop-loop drops blocks
2811                // of a different size than the incoming input. The
2812                // `dictionary_retained_budget` retire decision
2813                // downstream then gets driven by inflated eviction
2814                // counts and shrinks `max_window_size` prematurely.
2815                //
2816                // Derive the real eviction delta from `window_size`
2817                // before/after the call. The pop loop inside
2818                // `add_data` decrements `window_size` by each
2819                // evicted block length and then the final
2820                // `extend_from_slice + push_back` adds `space_len`,
2821                // so `evicted = pre + space_len - post`.
2822                let pre = m.window_size;
2823                let space_len = space.len();
2824                m.add_data(space, |data| {
2825                    // Same per-block recycle as the HashChain arm: push
2826                    // the spent input buffer back as-is rather than
2827                    // zero-filling to capacity. `add_data` mirrors the
2828                    // bytes into `history` and calls this every block, so
2829                    // capacity-wide zeroing would be hot-path waste;
2830                    // `get_next_space` zeroes at most `slice_size` bytes
2831                    // when it later reuses the buffer.
2832                    vec_pool.push(data);
2833                });
2834                // Plain `+` (the `saturating_sub` floors at 0): `pre` + one
2835                // block are byte counts bounded by the window, no overflow.
2836                evicted_bytes += (pre + space_len).saturating_sub(m.window_size);
2837            }
2838            MatcherStorage::Row(m) => {
2839                // RowMatchGenerator::add_data recycles the *input* buffer
2840                // through this callback every commit (its bytes are mirrored
2841                // into `history`), not the evicted chunks. Derive the eviction
2842                // delta from `window_size` before/after — `evicted = pre +
2843                // space_len - post` — exactly like the Simple / HashChain arms.
2844                // Counting the callback argument as evicted would charge the
2845                // whole committed block as evicted and prematurely retire
2846                // dictionary budget on a window that evicts nothing.
2847                let pre = m.window_size;
2848                let space_len = space.len();
2849                m.add_data(space, |data| {
2850                    // Recycle the spent buffer as-is; `add_data` runs this for
2851                    // every committed block, so zero-filling to capacity here
2852                    // would be hot-path waste (`get_next_space` zeroes at most
2853                    // `slice_size` on reuse).
2854                    vec_pool.push(data);
2855                });
2856                // Plain `+` (the `saturating_sub` floors at 0): `pre` + one
2857                // block are byte counts bounded by the window, no overflow.
2858                evicted_bytes += (pre + space_len).saturating_sub(m.window_size);
2859            }
2860            MatcherStorage::HashChain(m) => {
2861                // MatchTable::add_data now recycles the *incoming* buffer
2862                // through `reuse_space` (its bytes are copied into the
2863                // contiguous `history` mirror), so the callback no longer
2864                // reports evicted chunks. Derive the eviction delta from
2865                // `window_size` before/after, exactly like the Simple arm:
2866                // `evicted = pre + space_len - post`.
2867                let pre = m.table.window_size;
2868                let space_len = space.len();
2869                m.table.add_data(space, |data| {
2870                    // Recycle the spent input buffer to the pool as-is.
2871                    // `add_data` runs this callback for every committed
2872                    // block (the bytes are mirrored into `history`), so
2873                    // growing the buffer to its full capacity here would
2874                    // zero the whole allocation on the hot path.
2875                    // `get_next_space` resizes a popped buffer to
2876                    // `slice_size` on demand, touching at most
2877                    // `slice_size` bytes — never the larger capacity the
2878                    // pool retains.
2879                    vec_pool.push(data);
2880                });
2881                // Plain `+` (the `saturating_sub` floors at 0): byte counts
2882                // bounded by the window, no overflow.
2883                evicted_bytes += (pre + space_len).saturating_sub(m.table.window_size);
2884            }
2885        }
2886        // Gate the second backend trim pass on actual budget
2887        // reclamation. Without it, every slice commit on the
2888        // no-dictionary / no-eviction path (the common case) would
2889        // run a backend `match` ladder + `trim_to_window` early-out
2890        // for no reason — `trim_after_budget_retire` only does
2891        // meaningful work when `retire_dictionary_budget` shrank
2892        // `max_window_size` enough to make the backend's
2893        // `window_size > max_window_size` invariant trigger
2894        // eviction.
2895        if self.retire_dictionary_budget(evicted_bytes) {
2896            self.trim_after_budget_retire();
2897        }
2898    }
2899
2900    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2901        use super::strategy::{self, StrategyTag};
2902        // Borrowed one-shot Fast path: if the frame driver staged a
2903        // block range via `set_borrowed_block`, scan it in place against
2904        // the borrowed window instead of the owned committed block. Only
2905        // the Simple backend is instrumented (the gate guarantees it),
2906        // and the stage is consumed so the next block re-stages.
2907        if let Some((block_start, block_end)) = self.borrowed_pending.take() {
2908            match self.active_backend() {
2909                super::strategy::BackendTag::Simple => self.simple_mut().start_matching_borrowed(
2910                    block_start,
2911                    block_end,
2912                    &mut handle_sequence,
2913                ),
2914                super::strategy::BackendTag::Dfast => self
2915                    .dfast_matcher_mut()
2916                    .start_matching_borrowed(block_start, block_end, &mut handle_sequence),
2917                super::strategy::BackendTag::Row => {
2918                    // Same greedy/lazy parse split as the owned RowHash arm.
2919                    let greedy = self.parse == super::strategy::ParseMode::Greedy;
2920                    self.row_matcher_mut().start_matching_borrowed(
2921                        block_start,
2922                        block_end,
2923                        greedy,
2924                        &mut handle_sequence,
2925                    );
2926                }
2927                super::strategy::BackendTag::HashChain => match self.search {
2928                    super::strategy::SearchMethod::HashChain => self
2929                        .hc_matcher_mut()
2930                        .start_matching_lazy_borrowed(block_start, block_end, &mut handle_sequence),
2931                    super::strategy::SearchMethod::BinaryTree => {
2932                        // Run the SAME BT dispatch as the owned BinaryTree arm
2933                        // below — every BT body reads its range via
2934                        // current_block_range() and bytes via live_history()
2935                        // (borrowed-aware), so the staged block is scanned in
2936                        // place. The table was already staged by
2937                        // `set_borrowed_block` (the HashChain arm at the top of
2938                        // this file calls `table.stage_borrowed_block` with the
2939                        // same range, and `borrowed_pending` is set only there),
2940                        // so no re-stage is needed here.
2941                        // Only btlazy2 reaches the borrowed BinaryTree scan:
2942                        // `borrowed_supported()` keeps the optimal parsers
2943                        // (BtOpt/BtUltra/BtUltra2) on the owned path, and
2944                        // `set_borrowed_block` asserts that predicate before any
2945                        // range is staged, so an optimal strategy_tag can never
2946                        // arrive here.
2947                        match self.strategy_tag {
2948                            StrategyTag::Btlazy2 => self
2949                                .hc_matcher_mut()
2950                                .start_matching_btlazy2(&mut handle_sequence),
2951                            other => unreachable!(
2952                                "borrowed BinaryTree scan is only supported for Btlazy2, got {other:?}"
2953                            ),
2954                        }
2955                    }
2956                    other => {
2957                        unreachable!("HashChain backend with unexpected search {other:?}")
2958                    }
2959                },
2960            }
2961            return;
2962        }
2963        // Decoupled parse×search dispatch (fires once per block). The
2964        // search axis (`self.search`) picks the candidate-finding backend;
2965        // the parse axis (greedy vs lazy depth) is carried by the
2966        // backend's runtime `lazy_depth`, set per level at `reset()`.
2967        // The two are independent, so any parse can run on any search
2968        // backend. The `BinaryTree` arm still selects the opt `Strategy`
2969        // ZST off `strategy_tag` so `compress_block::<S>` keeps its
2970        // const-folded optimal-parser monomorphisation.
2971        use super::strategy::SearchMethod;
2972        match self.search {
2973            SearchMethod::Fast => {
2974                self.simple_mut().start_matching(&mut handle_sequence);
2975                self.recycle_simple_space();
2976            }
2977            SearchMethod::DoubleFast => {
2978                self.dfast_matcher_mut()
2979                    .start_matching(&mut handle_sequence);
2980            }
2981            SearchMethod::RowHash => {
2982                // Greedy parse (depth 0) = donor-greedy entry (default
2983                // `ip + 1` start, greedy repcode commit); lazy / lazy2 use
2984                // the `pick_lazy_match` lookahead entry (reads `lazy_depth`).
2985                // Both bare entries dispatch on `row_log` internally into the
2986                // const-`ROW_LOG` hot loop (donor per-rowLog variant table).
2987                let greedy = self.parse == super::strategy::ParseMode::Greedy;
2988                let row = self.row_matcher_mut();
2989                if greedy {
2990                    row.start_matching_greedy(&mut handle_sequence);
2991                } else {
2992                    row.start_matching(&mut handle_sequence);
2993                }
2994            }
2995            SearchMethod::HashChain => {
2996                // Greedy/lazy/lazy2 all flow through the lazy parser; it
2997                // reads `hc.lazy_depth` (0 = greedy commit).
2998                self.hc_matcher_mut()
2999                    .start_matching_lazy(&mut handle_sequence);
3000            }
3001            SearchMethod::BinaryTree => match self.strategy_tag {
3002                StrategyTag::Btlazy2 => self
3003                    .hc_matcher_mut()
3004                    .start_matching_btlazy2(&mut handle_sequence),
3005                StrategyTag::BtOpt => self.compress_block::<strategy::BtOpt>(&mut handle_sequence),
3006                StrategyTag::BtUltra => {
3007                    self.compress_block::<strategy::BtUltra>(&mut handle_sequence)
3008                }
3009                StrategyTag::BtUltra2 => {
3010                    self.compress_block::<strategy::BtUltra2>(&mut handle_sequence)
3011                }
3012                _ => unreachable!(
3013                    "SearchMethod::BinaryTree requires a BT strategy tag (Btlazy2/BtOpt/BtUltra/BtUltra2)"
3014                ),
3015            },
3016        }
3017    }
3018
3019    fn skip_matching(&mut self) {
3020        self.skip_matching_with_hint(None);
3021    }
3022
3023    fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
3024        // Borrowed one-shot Fast path: a staged block range routes to the
3025        // borrowed skip (records the range for `get_last_space`, primes
3026        // hashes on the dict-priming hint) with no owned-history append
3027        // and nothing to recycle. Stage is consumed.
3028        if let Some((block_start, block_end)) = self.borrowed_pending.take() {
3029            match self.active_backend() {
3030                super::strategy::BackendTag::Simple => self.simple_mut().skip_matching_borrowed(
3031                    block_start,
3032                    block_end,
3033                    incompressible_hint,
3034                ),
3035                super::strategy::BackendTag::Dfast => self
3036                    .dfast_matcher_mut()
3037                    .skip_matching_borrowed(block_start, block_end, incompressible_hint),
3038                super::strategy::BackendTag::Row => self.row_matcher_mut().skip_matching_borrowed(
3039                    block_start,
3040                    block_end,
3041                    incompressible_hint,
3042                ),
3043                super::strategy::BackendTag::HashChain => self
3044                    .hc_matcher_mut()
3045                    .skip_matching_borrowed(block_start, block_end, incompressible_hint),
3046            }
3047            return;
3048        }
3049        match self.active_backend() {
3050            super::strategy::BackendTag::Simple => {
3051                self.simple_mut()
3052                    .skip_matching_with_hint(incompressible_hint);
3053                self.recycle_simple_space();
3054            }
3055            super::strategy::BackendTag::Dfast => {
3056                self.dfast_matcher_mut().skip_matching(incompressible_hint)
3057            }
3058            super::strategy::BackendTag::Row => self
3059                .row_matcher_mut()
3060                .skip_matching_with_hint(incompressible_hint),
3061            super::strategy::BackendTag::HashChain => {
3062                self.hc_matcher_mut().skip_matching(incompressible_hint)
3063            }
3064        }
3065    }
3066}
3067
3068impl MatchGeneratorDriver {
3069    /// Monomorphised optimal-parser entry point. Only the `BinaryTree`
3070    /// search arm of [`Matcher::start_matching`] routes here, selecting
3071    /// the concrete opt `S: Strategy` (BtOpt / BtUltra / BtUltra2) off
3072    /// `strategy_tag`, so the optimiser keeps the cost-model predicates
3073    /// (`S::USE_BT` / `S::USE_HASH3` / `S::ACCURATE_PRICE` /
3074    /// `S::TWO_PASS_SEED`) const-folded per strategy. The non-opt search
3075    /// backends (Fast / DoubleFast / RowHash / HashChain) are dispatched
3076    /// directly off the search axis and never reach this method, so all
3077    /// strategies arriving here are HashChain-backed.
3078    fn compress_block<S: super::strategy::Strategy>(
3079        &mut self,
3080        handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
3081    ) {
3082        debug_assert_eq!(S::BACKEND, super::strategy::BackendTag::HashChain);
3083        debug_assert!(
3084            S::USE_BT,
3085            "compress_block only handles the optimal (BT) path"
3086        );
3087        self.hc_matcher_mut()
3088            .start_matching_strategy::<S>(handle_sequence);
3089    }
3090}
3091
3092/// Stage D: backend storage discriminator.
3093///
3094/// HC (lazy / lazy2) modes carry no extra per-frame state beyond the
3095/// shared `MatchTable` and `HcMatcher` runtime knobs, so the
3096/// [`HcBackend::Hc`] variant is zero-sized — no BT scratch is
3097/// allocated. BT-flavoured modes (`btopt` / `btultra` / `btultra2`)
3098/// hold the full [`super::bt::BtMatcher`] inside the
3099/// [`HcBackend::Bt`] variant (cost model, optimal-parser scratch
3100/// arenas, LDM candidate buffer).
3101///
3102/// The discriminator lives next to `parse_mode` so `configure()` can
3103/// promote between the two on a level change without touching the
3104/// `MatchTable` storage.
3105#[derive(Clone)]
3106pub(crate) enum HcBackend {
3107    /// Lazy / lazy2 modes — no per-frame backend state.
3108    Hc,
3109    /// BT-driven modes — owns the optimal parser's per-frame scratch.
3110    /// Boxed so the enum stays pointer-sized: HC-only matchers pay
3111    /// just the `Box`-niche, not the 4 KiB `BtMatcher` payload.
3112    Bt(alloc::boxed::Box<super::bt::BtMatcher>),
3113}
3114
3115impl HcBackend {
3116    /// Heap bytes held by the backend. `Hc` is zero-sized; `Bt` boxes a
3117    /// `BtMatcher`, so count the boxed payload plus its own scratch heap.
3118    fn heap_size(&self) -> usize {
3119        match self {
3120            Self::Hc => 0,
3121            Self::Bt(bt) => core::mem::size_of::<super::bt::BtMatcher>() + bt.heap_size(),
3122        }
3123    }
3124
3125    /// Mutable accessor on the BT matcher; panics if the active
3126    /// backend is `Hc`. The HC-or-Bt branches in orchestrator code use
3127    /// `let HcBackend::Bt(bt) = &self.backend` directly for readonly
3128    /// access — this helper exists so macro bodies that already drive
3129    /// a mutable BT update through the optimal parser can write
3130    /// `$self.backend.bt_mut().X` without an outer `match` ladder.
3131    #[inline(always)]
3132    pub(crate) fn bt_mut(&mut self) -> &mut super::bt::BtMatcher {
3133        match self {
3134            Self::Bt(bt) => bt,
3135            Self::Hc => unreachable!("BT-only accessor called in HC mode"),
3136        }
3137    }
3138}
3139
3140#[derive(Clone)]
3141struct HcMatchGenerator {
3142    /// Shared match-finder storage (window, history, hash / chain /
3143    /// hash3 tables, dictionary-priming flags). Used identically by HC
3144    /// and BT modes; backend-specific table interpretation lives in the
3145    /// matcher methods on this struct.
3146    table: super::match_table::storage::MatchTable,
3147    /// HC runtime knobs (lazy_depth, search_depth, target_len). Always
3148    /// present — BT modes still consult `hc.search_depth` for repcode
3149    /// probing and chain candidate enumeration.
3150    hc: super::hc::HcMatcher,
3151    /// Backend discriminator. [`HcBackend::Hc`] is zero-sized for the
3152    /// lazy / lazy2 path so HC-only generators don't carry the BT
3153    /// optimal-parser scratch buffers. [`HcBackend::Bt`] holds the
3154    /// `BtMatcher` when an optimal mode is configured.
3155    backend: HcBackend,
3156    /// Compile-time strategy tag mirrored from
3157    /// [`MatchGeneratorDriver::strategy_tag`] during `configure()`.
3158    /// The driver hot path never reads this — it dispatches to
3159    /// `compress_block::<S>` from its own tag — but the
3160    /// `#[cfg(test)] start_matching` helper consumes it so artificial
3161    /// test setups still pick the correct concrete `S` for the
3162    /// const-generic optimal parser (BtOpt vs BtUltra vs BtUltra2).
3163    /// Without this field the test path would have to collapse
3164    /// `BtOpt` and `BtUltra` onto the same monomorphisation since
3165    /// `table.uses_bt` / `table.is_btultra2` alone can't tell them
3166    /// apart.
3167    strategy_tag: super::strategy::StrategyTag,
3168}
3169
3170// Plain-data types relocated to [`crate::encoding::opt::types`] and
3171// [`crate::encoding::opt::ldm`] by #111 Phase 1. The use statements at
3172// the top of this file bring them back into scope so the existing
3173// methods on `HcMatchGenerator` compile unchanged.
3174
3175/// `bt_insert_step_no_rebase` body parameterized over the per-CPU
3176/// `count_match_from_indices` symbol. Each kernel-specific wrapper invokes
3177/// the macro with its own `fastpath::<kernel>::count_match_from_indices`
3178/// path so the call resolves inside the wrapper's `#[target_feature]`
3179/// umbrella and inlines instead of paying the function-call ABI per BT walk
3180/// iteration. Used only by `HcMatchGenerator` BT walk wrappers below.
3181///
3182/// Crate-private: the macro body references private `encoding::*`
3183/// modules via `$crate::...`, so it is unusable downstream and is
3184/// re-exported only inside this crate via `pub(crate) use` below.
3185macro_rules! bt_insert_step_no_rebase_body {
3186    ($table:expr, $search_depth:expr, $abs_pos:ident, $current_abs_end:ident, $target_abs:ident, $cmf:path) => {{
3187        let idx = $abs_pos - $table.history_abs_start;
3188        // Borrowed-aware live region (owned: `history[history_start..]`;
3189        // borrowed: the in-place input `[0, block_end)`). Reborrow-then-raw-ptr
3190        // so the slice holds NO borrow and coexists with the `&mut $table`
3191        // binary-tree writes below. Owned is byte-identical (same bytes).
3192        let concat: &[u8] = unsafe {
3193            let lh = $table.live_history();
3194            core::slice::from_raw_parts(lh.as_ptr(), lh.len())
3195        };
3196        if idx + 8 > concat.len() {
3197            return 1;
3198        }
3199        debug_assert!(
3200            $abs_pos <= $current_abs_end,
3201            "BT walker called past current block end"
3202        );
3203        let tail_limit = $current_abs_end - $abs_pos;
3204        let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
3205            concat,
3206            idx,
3207            $table.hash_log,
3208            $table.search_mls,
3209        );
3210        // Prefetch the hash bucket now. For the large L16+ hash table over
3211        // high-entropy input the bucket is L3/DRAM-cold, and unlike upstream's
3212        // monolithic ZSTD_btGetAllMatches (which overlaps this miss with its
3213        // inline rep/hash3 prologue) the read+write of `hash_table[hash]`
3214        // below is reached with nothing to hide it behind — it stalled a large
3215        // share of this function's cycles. Issuing the hint here lets the miss
3216        // overlap the address setup that follows.
3217        #[cfg(all(
3218            target_feature = "sse",
3219            any(target_arch = "x86", target_arch = "x86_64")
3220        ))]
3221        {
3222            #[cfg(target_arch = "x86")]
3223            use core::arch::x86::{_MM_HINT_T0, _mm_prefetch};
3224            #[cfg(target_arch = "x86_64")]
3225            use core::arch::x86_64::{_MM_HINT_T0, _mm_prefetch};
3226            // SAFETY: prefetch is a hint that never faults; `hash` indexes
3227            // `hash_table` directly below, so it is in bounds.
3228            unsafe {
3229                _mm_prefetch($table.hash_table.as_ptr().add(hash).cast(), _MM_HINT_T0);
3230            }
3231            // Prefetch the NEXT position's bucket too. The optimal-parser DP
3232            // advances one position per iteration, so this miss is issued a
3233            // full BT walk plus the next iteration's pre-collect work ahead of
3234            // the collect that will read it — far more lead than the same-call
3235            // hint above, enough to hide the full DRAM latency.
3236            if idx + 1 + 8 <= concat.len() {
3237                let hash_next =
3238                    $crate::encoding::match_table::storage::MatchTable::hash_position_at(
3239                        concat,
3240                        idx + 1,
3241                        $table.hash_log,
3242                        $table.search_mls,
3243                    );
3244                // SAFETY: prefetch never faults; an out-of-range index is a
3245                // harmless no-op hint.
3246                unsafe {
3247                    _mm_prefetch(
3248                        $table.hash_table.as_ptr().add(hash_next).cast(),
3249                        _MM_HINT_T0,
3250                    );
3251                }
3252            }
3253        }
3254        let Some(relative_pos) = $table.relative_position($abs_pos) else {
3255            return 1;
3256        };
3257        let stored = relative_pos + 1;
3258        let bt_mask = $table.bt_mask();
3259        // `abs_pos < bt_mask` legitimately happens for the first BT walk of
3260        // a fresh frame (bt_low effectively "no floor"). Saturating keeps
3261        // the floor at 0 so the `candidate_abs <= bt_low` check never
3262        // triggers early; raw subtraction would underflow into a huge
3263        // sentinel that ALWAYS triggers.
3264        let bt_low = $abs_pos.saturating_sub(bt_mask);
3265        // Hoist the BT pointer-pair base out of `self` once — see the
3266        // collect-matches body for the full rationale (per-step Vec reload +
3267        // bounds check through `&mut self` vs the donor's raw `U32*` walk).
3268        let chain_ptr = $table.chain_table.as_mut_ptr();
3269        debug_assert_eq!($table.chain_table.len(), 2 << $table.bt_log());
3270        let window_low = $table.window_low_abs_for_target($target_abs);
3271        // `abs_pos + 9` is safe in raw form: `MatchTable::add_data` caps
3272        // total input at `usize::MAX - STREAM_ABS_HEADROOM` (where
3273        // `STREAM_ABS_HEADROOM = HC_OPT_NUM + 16`), so every
3274        // frame-lifetime absolute cursor passed to the BT walker stays
3275        // below `usize::MAX - 9` regardless of stream length or
3276        // pointer width. The guard is hoisted to the data-ingest
3277        // boundary so this per-position site pays zero arithmetic
3278        // overhead in the hot loop.
3279        let mut match_end_abs = $abs_pos + 9;
3280        let mut best_len = 8usize;
3281        let mut compares_left = $search_depth;
3282        let mut common_length_smaller = 0usize;
3283        let mut common_length_larger = 0usize;
3284        let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
3285        let mut smaller_slot = pair_idx;
3286        let mut larger_slot = pair_idx + 1;
3287        let mut match_stored = $table.hash_table[hash];
3288        $table.hash_table[hash] = stored;
3289
3290        while compares_left > 0 {
3291            if match_stored == $crate::encoding::match_table::storage::HC_EMPTY {
3292                break;
3293            }
3294            // Reject stale post-rebase slots whose pre-shift position is below
3295            // `index_shift` explicitly. A `wrapping_sub` maps such a slot to a
3296            // near-`usize::MAX` value that the `>= abs_pos` test only rejects
3297            // while `abs_pos` is far from the integer ceiling; on a
3298            // long-running rebased stream (reachable on 32-bit) `abs_pos` can
3299            // approach the ceiling and the wrapped value can land back inside
3300            // `[window_low, abs_pos)`. `checked_sub` ends the walk on the
3301            // underflow instead. `match_stored != HC_EMPTY` here, so the `- 1`
3302            // cannot underflow.
3303            let Some(candidate_abs) = ($table.position_base + (match_stored as usize - 1))
3304                .checked_sub($table.index_shift)
3305            else {
3306                break;
3307            };
3308            if candidate_abs < window_low || candidate_abs >= $abs_pos {
3309                break;
3310            }
3311            compares_left -= 1;
3312
3313            let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
3314            // SAFETY: `next_pair_idx (+1)` = `2*(candidate_abs & bt_mask) (+1)`
3315            // ≤ `chain_table.len()-1`; `chain_ptr` is the hoisted live base,
3316            // table not realloc'd during the walk.
3317            let next_smaller = unsafe { *chain_ptr.add(next_pair_idx) };
3318            let next_larger = unsafe { *chain_ptr.add(next_pair_idx + 1) };
3319            let seed_len = common_length_smaller.min(common_length_larger);
3320            let candidate_idx = candidate_abs - $table.history_abs_start;
3321            // SAFETY: BT walk invariant — `candidate_idx + tail_limit ≤
3322            // concat.len()` since the candidate is within
3323            // `[history_abs_start, abs_pos)` and `tail_limit ≤
3324            // current_abs_end - abs_pos`.
3325            let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
3326
3327            if match_len > best_len {
3328                best_len = match_len;
3329                // `candidate_abs + match_len <= current_abs_end` by BT walk
3330                // invariant — `match_len <= tail_limit = current_abs_end -
3331                // abs_pos` and `candidate_abs < abs_pos`.
3332                let candidate_end = candidate_abs + match_len;
3333                if candidate_end > match_end_abs {
3334                    match_end_abs = candidate_end;
3335                }
3336            }
3337
3338            if match_len >= tail_limit {
3339                break;
3340            }
3341
3342            let candidate_next = candidate_idx + match_len;
3343            let current_next = idx + match_len;
3344            // SAFETY: first-differing positions after a match_len-long prefix;
3345            // match_len < tail_limit (break above) + BT-walk bound
3346            // idx/candidate_idx + tail_limit <= concat.len() keep both in range.
3347            if unsafe {
3348                *concat.get_unchecked(candidate_next) < *concat.get_unchecked(current_next)
3349            } {
3350                // SAFETY: `smaller_slot` holds a valid pair index (init
3351                // `pair_idx`, updated to `next_pair_idx + 1`); the `usize::MAX`
3352                // sentinel is set only just before `break`, never written here.
3353                unsafe { *chain_ptr.add(smaller_slot) = match_stored };
3354                common_length_smaller = match_len;
3355                if candidate_abs <= bt_low {
3356                    smaller_slot = usize::MAX;
3357                    break;
3358                }
3359                smaller_slot = next_pair_idx + 1;
3360                match_stored = next_larger;
3361            } else {
3362                // SAFETY: as above for `larger_slot`.
3363                unsafe { *chain_ptr.add(larger_slot) = match_stored };
3364                common_length_larger = match_len;
3365                if candidate_abs <= bt_low {
3366                    larger_slot = usize::MAX;
3367                    break;
3368                }
3369                larger_slot = next_pair_idx;
3370                match_stored = next_smaller;
3371            }
3372        }
3373
3374        // SAFETY: both slots, when not the `usize::MAX` sentinel, hold valid
3375        // pair indices into the hoisted `chain_table` base.
3376        if smaller_slot != usize::MAX {
3377            unsafe {
3378                *chain_ptr.add(smaller_slot) = $crate::encoding::match_table::storage::HC_EMPTY
3379            };
3380        }
3381        if larger_slot != usize::MAX {
3382            unsafe {
3383                *chain_ptr.add(larger_slot) = $crate::encoding::match_table::storage::HC_EMPTY
3384            };
3385        }
3386
3387        let speed_positions = if best_len > 384 {
3388            (best_len - 384).min(192)
3389        } else {
3390            0
3391        };
3392        // `match_end_abs` is initialized to `abs_pos + 9` and is only
3393        // reassigned inside the `candidate_end > match_end_abs` branch
3394        // above. So even though an individual `candidate_end =
3395        // candidate_abs + match_len` can land below `abs_pos` (the
3396        // candidate sits earlier in history and the match runs short),
3397        // the variable itself never drops below its initial value.
3398        // That gives `match_end_abs ≥ abs_pos + 9 > abs_pos + 8` as a
3399        // loop-wide invariant, so the raw subtraction below cannot
3400        // underflow.
3401        speed_positions.max(match_end_abs - ($abs_pos + 8))
3402    }};
3403}
3404pub(crate) use bt_insert_step_no_rebase_body;
3405
3406/// `build_optimal_plan_impl` body parameterized over the per-CPU
3407/// `collect_optimal_candidates_initialized_<kernel>` method name. Caller
3408/// passes its `&mut self`, the seven DP entry-point arguments, and the
3409/// kernel-specific collect method. Each per-kernel wrapper invokes this
3410/// macro inside its own `#[target_feature]` umbrella so the per-position
3411/// `$collect` call inlines and the entire DP loop runs as one straight-line
3412/// hot path without an ABI barrier between the DP and the match-gathering
3413/// pipeline.
3414///
3415/// Body is ~730 lines but mechanically identical across kernels — the macro
3416/// keeps a single source of truth. The two const generics
3417/// (`ACCURATE_PRICE`, `FAVOR_SMALL_OFFSETS`) come from the wrapper's
3418/// generic parameter list and are referenced as bare identifiers; macro
3419/// hygiene resolves them at the expansion site.
3420/// Donor `offBase` for the btlazy2 lazy gain heuristic: a match whose offset
3421/// equals one of the three active repeat offsets prices as the cheap repcode
3422/// code (1/2/3); any other offset prices as `offset + 3`. So an equal-length
3423/// repeat-offset match always out-gains an explicit-offset one
3424/// (`zstd_lazy.c` `ZSTD_storeSeq` offBase convention).
3425#[inline]
3426fn btlazy2_offbase(offset: usize, reps: [u32; 3], ll0: bool) -> u32 {
3427    let o = offset as u32;
3428    // Donor repcode mapping shifts by `ll0` (zero-literal position): the cheap
3429    // codes become rep1 / rep2 / (rep0 - 1) instead of rep0 / rep1 / rep2,
3430    // because at ll0 an offset equal to rep0 is the special rep0-1 case, not
3431    // repcode 1. Scoring offsets against the wrong code at ll0 over-rewards a
3432    // rep0-distance match that does not actually encode as the cheapest code.
3433    if ll0 {
3434        if o == reps[1] {
3435            1
3436        } else if o == reps[2] {
3437            2
3438        } else if reps[0] > 1 && o == reps[0] - 1 {
3439            3
3440        } else {
3441            // Offsets are < window (<= 2^27), so `+ 3` never overflows u32.
3442            o + 3
3443        }
3444    } else if o == reps[0] {
3445        1
3446    } else if o == reps[1] {
3447        2
3448    } else if o == reps[2] {
3449        3
3450    } else {
3451        // Offsets are < window (<= 2^27), so `+ 3` never overflows u32.
3452        o + 3
3453    }
3454}
3455
3456/// Donor lazy match gain (`matchLength * 4 - ZSTD_highbit32(offBase)`): the
3457/// selection metric that lets a shorter repeat-offset match beat a longer
3458/// explicit-offset one. `offBase >= 1`, so `highbit` is well-defined.
3459#[inline]
3460fn btlazy2_gain(match_len: usize, offset: usize, reps: [u32; 3], ll0: bool) -> i64 {
3461    let offbase = btlazy2_offbase(offset, reps, ll0);
3462    (match_len as i64) * 4 - (31 - offbase.leading_zeros()) as i64
3463}
3464
3465/// Per-kernel body of the `btlazy2` (levels 13-15) greedy/lazy parse over
3466/// the binary-tree match finder. Mirrors `build_optimal_plan_impl_body!`'s
3467/// kernel-dispatch discipline: the wrapper carries the `#[target_feature]`
3468/// umbrella and passes its tier-specific `collect_optimal_candidates_initialized_<kernel>`
3469/// as `$collect`, so the per-position BT collect (and its inlined cpl)
3470/// stays under one umbrella — the runtime `select_kernel()` dispatch happens
3471/// ONCE per block in the bare `start_matching_btlazy2`, never per position.
3472macro_rules! start_matching_btlazy2_body {
3473    ($self:ident, $handle_sequence:ident, $collect:ident, $cmf:path $(,)?) => {{
3474        $self.table.ensure_tables();
3475        // Borrowed-aware: owned → last committed chunk; borrowed → staged block.
3476        let (current_abs_start, current_len) = $self.table.current_block_range();
3477        if current_len == 0 {
3478            return;
3479        }
3480        let current_ptr = $self.table.get_last_space().as_ptr();
3481        // Mutates tables but never reallocates `history`, so this tail slice
3482        // stays valid for the routine's duration (same as the other parsers).
3483        let current: &[u8] = unsafe { core::slice::from_raw_parts(current_ptr, current_len) };
3484        // Full contiguous live region (owned: dict + prior blocks + current
3485        // block in `history`; borrowed: `[0, block_end)` of the in-place
3486        // input) as a raw slice, for the explicit repcode probe: a rep offset
3487        // can point before the current block, which `current` can't reach.
3488        // `live_history()` is borrowed-aware; reborrow-then-raw-ptr so the
3489        // slice holds NO borrow and coexists with the `&mut self` collector
3490        // calls below. Same no-realloc validity contract as `current`.
3491        let history_abs_start = $self.table.history_abs_start;
3492        let concat_full: &[u8] = unsafe {
3493            let lh = $self.table.live_history();
3494            core::slice::from_raw_parts(lh.as_ptr(), lh.len())
3495        };
3496        let current_abs_end = current_abs_start + current_len;
3497        $self
3498            .table
3499            .apply_limited_update_after_long_match(current_abs_start);
3500        $self
3501            .table
3502            .backfill_boundary_positions(current_abs_start, current_abs_end);
3503
3504        let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::Btlazy2>();
3505        let mut candidates = core::mem::take(&mut $self.backend.bt_mut().opt_candidates_scratch);
3506
3507        let depth = $self.hc.lazy_depth as usize;
3508        let mut pos = 0usize;
3509        let mut literals_start = 0usize;
3510
3511        // Collect + select the highest-GAIN match at a position (donor
3512        // `ZSTD_searchMax` plus the explicit offset_1 repcode check): scan the
3513        // length-sorted BT/dms ladder by gain, then probe rep0 directly since
3514        // the ladder's strictly-increasing-length filter drops short cheap
3515        // reps. Expands to `(match_len, offset)`; `match_len == 0` = no match.
3516        macro_rules! bt_select {
3517            ($p:expr) => {{
3518                let sel_pos: usize = $p;
3519                // `ll0` (donor): zero literals pending before this position, so
3520                // the repcode set is shifted (see `btlazy2_offbase`).
3521                let ll0 = sel_pos == literals_start;
3522                let sel_abs = current_abs_start + sel_pos;
3523                candidates.clear();
3524                let query = HcCandidateQuery {
3525                    reps: $self.table.offset_hist,
3526                    lit_len: sel_pos - literals_start,
3527                    // No LDM seed: L13-15 run at windowLog 22, below donor's
3528                    // LDM auto-enable threshold (windowLog >= 27).
3529                    ldm_candidate: None,
3530                };
3531                // SAFETY: called inside the wrapper's `#[target_feature]`
3532                // umbrella (the scalar wrapper's `$collect` is a safe fn).
3533                unsafe {
3534                    $self.$collect::<super::strategy::Btlazy2, true>(
3535                        sel_abs,
3536                        current_abs_end,
3537                        profile,
3538                        query,
3539                        &mut candidates,
3540                    );
3541                }
3542                let reps = $self.table.offset_hist;
3543                let mut sel_ml = 0usize;
3544                let mut sel_off = 0usize;
3545                let mut sel_gain = i64::MIN;
3546                for c in candidates.iter() {
3547                    let ml = c.match_len.min(current_len - sel_pos);
3548                    if ml < HC_OPT_MIN_MATCH_LEN {
3549                        continue;
3550                    }
3551                    let g = btlazy2_gain(ml, c.offset, reps, ll0);
3552                    if g > sel_gain {
3553                        sel_gain = g;
3554                        sel_ml = ml;
3555                        sel_off = c.offset;
3556                    }
3557                }
3558                let sel_idx = sel_abs - history_abs_start;
3559                // Donor probes `rep[0 + ll0]` directly (the length-sorted ladder
3560                // drops short cheap reps): rep0 normally, rep1 at a zero-literal
3561                // position where rep0 is not the cheapest code.
3562                let probe_rep = if ll0 {
3563                    reps[1] as usize
3564                } else {
3565                    reps[0] as usize
3566                };
3567                if probe_rep != 0 && sel_idx >= probe_rep {
3568                    let tail = current_len - sel_pos;
3569                    // SAFETY: `sel_idx - probe_rep < sel_idx`, `sel_idx + tail <=
3570                    // concat_full.len()`; same overshoot slack the collector
3571                    // relies on for this block.
3572                    let rep_ml =
3573                        unsafe { $cmf(concat_full, sel_idx, sel_idx - probe_rep, tail, 0) };
3574                    if rep_ml >= HC_OPT_MIN_MATCH_LEN
3575                        && btlazy2_gain(rep_ml, probe_rep, reps, ll0) > sel_gain
3576                    {
3577                        sel_ml = rep_ml;
3578                        sel_off = probe_rep;
3579                    }
3580                }
3581                (sel_ml, sel_off)
3582            }};
3583        }
3584
3585        while pos + HC_OPT_MIN_MATCH_LEN <= current_len {
3586            let (mut best_ml, mut best_off) = bt_select!(pos);
3587            if best_ml < HC_OPT_MIN_MATCH_LEN {
3588                pos += 1;
3589                continue;
3590            }
3591            // Lazy lookahead (donor depth 1/2): advance one byte and accept the
3592            // later match only if it out-gains the current one by the donor
3593            // margin (deferring costs an extra literal — `+4` at depth 1, `+7`
3594            // at depth 2). `start` tracks where the chosen match begins.
3595            let mut start = pos;
3596            let mut d = 0usize;
3597            while d < depth && start + 1 + HC_OPT_MIN_MATCH_LEN <= current_len {
3598                let look = start + 1;
3599                let (ml2, off2) = bt_select!(look);
3600                if ml2 < HC_OPT_MIN_MATCH_LEN {
3601                    break;
3602                }
3603                let reps = $self.table.offset_hist;
3604                let margin = if d == 0 { 4 } else { 7 };
3605                // `best` sits at `start` (ll0 iff no literals precede it); the
3606                // lookahead match at `start + 1` always has a pending literal.
3607                let gain1 = btlazy2_gain(best_ml, best_off, reps, start == literals_start) + margin;
3608                let gain2 = btlazy2_gain(ml2, off2, reps, false);
3609                if gain2 > gain1 {
3610                    best_ml = ml2;
3611                    best_off = off2;
3612                    start = look;
3613                    d += 1;
3614                } else {
3615                    break;
3616                }
3617            }
3618            // Commit the chosen match at `start`; [literals_start, start) is
3619            // emitted as literals. `best_ml` was bounded to `current_len -
3620            // start` at selection, so `start + best_ml <= current_len`.
3621            let lit_len = start - literals_start;
3622            let literals = &current[literals_start..start];
3623            $handle_sequence(Sequence::Triple {
3624                literals,
3625                offset: best_off,
3626                match_len: best_ml,
3627            });
3628            let _ = encode_offset_with_history(
3629                best_off as u32,
3630                lit_len as u32,
3631                &mut $self.table.offset_hist,
3632            );
3633            pos = start + best_ml;
3634            literals_start = pos;
3635        }
3636
3637        if literals_start < current_len {
3638            $handle_sequence(Sequence::Literals {
3639                literals: &current[literals_start..],
3640            });
3641        }
3642        $self.backend.bt_mut().opt_candidates_scratch = candidates;
3643    }};
3644}
3645
3646macro_rules! build_optimal_plan_impl_body {
3647    (
3648        $self:expr,
3649        $strategy_ty:ty,
3650        $current:ident,
3651        $current_abs_start:ident,
3652        $current_len:ident,
3653        $initial_state:ident,
3654        $stats:ident,
3655        $out:ident,
3656        $collect:ident $(,)?
3657    ) => {{
3658        let current_abs_end = $current_abs_start + $current_len;
3659        let min_match_len = HC_OPT_MIN_MATCH_LEN;
3660        // `HC_OPT_NUM > 0` by const definition, so `HC_OPT_NUM - 1` is safe.
3661        let frontier_limit = $current_len.min(HC_OPT_NUM - 1);
3662        let initial_reps = $initial_state.reps;
3663        let initial_litlen = $initial_state.litlen;
3664        let ldm_block_offset = $initial_state.block_offset;
3665        let mut profile = $initial_state.profile;
3666        profile.sufficient_match_len = $self.hc.sufficient_match_len_for_pass(profile);
3667        // Const-fold from the strategy's associated `OPT_LEVEL`
3668        // (donor `optLevel`): BtOpt = 0, BtUltra / BtUltra2 = 2.
3669        // The two flags below are the only places the inner DP loop
3670        // used to consult `parse_mode`; lifting them into const
3671        // expressions drops one indirect read + one branch on every
3672        // candidate insertion and every traceback step.
3673        // `let` (not `const`) — nested `const` items inside a
3674        // generic fn cannot project through the outer fn's type
3675        // parameter, but a `let` binding from a const expression
3676        // does get folded by the optimiser per monomorphisation,
3677        // which is what we actually want here.
3678        debug_assert!(
3679            <$strategy_ty as super::strategy::Strategy>::USE_BT,
3680            "build_optimal_plan_impl_body called on non-BT strategy"
3681        );
3682        let abort_on_worse_match: bool =
3683            <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL == 0;
3684        let opt_level: bool = <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL >= 2;
3685        let mut nodes = core::mem::take(&mut $self.backend.bt_mut().opt_nodes_scratch);
3686        // `frontier_limit + 2 <= HC_OPT_NODE_LEN` — bounded by const.
3687        let frontier_buffer_size = frontier_limit + 2;
3688        if nodes.len() < HC_OPT_NODE_LEN {
3689            // First optimal-parse use (empty boxed slice) or an undersized
3690            // buffer: allocate the fixed upstream-zstd-sized frontier once. The DP
3691            // overwrites the active prefix before reading it.
3692            nodes = alloc::vec![HcOptimalNode::default(); HC_OPT_NODE_LEN].into_boxed_slice();
3693        }
3694        let mut candidates = core::mem::take(&mut $self.backend.bt_mut().opt_candidates_scratch);
3695        candidates.clear();
3696        if candidates.capacity() < MAX_HC_SEARCH_DEPTH {
3697            candidates.reserve_exact(MAX_HC_SEARCH_DEPTH - candidates.capacity());
3698        }
3699        let mut store = core::mem::take(&mut $self.backend.bt_mut().opt_store_scratch);
3700        store.clear();
3701        let mut price_arena = core::mem::take(&mut $self.backend.bt_mut().opt_price_arena);
3702        if price_arena.len() < HC_OPT_PRICE_ARENA_LEN {
3703            price_arena = alloc::vec![[0u32; 2]; HC_OPT_PRICE_ARENA_LEN].into_boxed_slice();
3704        }
3705        // Single arena → two disjoint fixed-stride regions of `[price,
3706        // generation]` pairs (LL cache, ML cache): one base pointer + fixed
3707        // offsets, mirroring upstream zstd's single opt workspace. Pairing
3708        // price+generation per code keeps the optimal parser's cache probe
3709        // on ONE line instead of two strided regions.
3710        // SAFETY: `price_arena` is exactly `HC_OPT_PRICE_ARENA_LEN =
3711        // 2 * HC_OPT_PRICE_STRIDE` pairs long (just ensured), so the two
3712        // STRIDE-wide regions are in bounds and disjoint. The slices alias
3713        // the heap buffer `price_arena` owns; that heap address is stable
3714        // across the later move of the `price_arena` box into the result
3715        // bundle (a `Box` move relocates only the pointer, not the heap
3716        // data), and the slices are never used after the bundle is
3717        // constructed. The fixed STRIDE (independent of `frontier_limit`)
3718        // keeps every code's cell at a constant offset so the monotonic
3719        // stamps stay valid across calls with different frontiers.
3720        let arena_base = price_arena.as_mut_ptr();
3721        let mut ll_cache: &mut [[u32; 2]] =
3722            unsafe { core::slice::from_raw_parts_mut(arena_base, HC_OPT_PRICE_STRIDE) };
3723        let mut ml_cache: &mut [[u32; 2]] = unsafe {
3724            core::slice::from_raw_parts_mut(arena_base.add(HC_OPT_PRICE_STRIDE), HC_OPT_PRICE_STRIDE)
3725        };
3726        $self.backend.bt_mut().opt_ll_price_stamp = $self
3727            .backend
3728            .bt_mut()
3729            .opt_ll_price_stamp
3730            .wrapping_add(1)
3731            .max(1);
3732        let ll_price_stamp = $self.backend.bt_mut().opt_ll_price_stamp;
3733        $self.backend.bt_mut().opt_lit_price_stamp = $self
3734            .backend
3735            .bt_mut()
3736            .opt_lit_price_stamp
3737            .wrapping_add(1)
3738            .max(1);
3739        let lit_price_stamp = $self.backend.bt_mut().opt_lit_price_stamp;
3740        $self.backend.bt_mut().opt_ml_price_stamp = $self
3741            .backend
3742            .bt_mut()
3743            .opt_ml_price_stamp
3744            .wrapping_add(1)
3745            .max(1);
3746        let ml_price_stamp = $self.backend.bt_mut().opt_ml_price_stamp;
3747        nodes[0] = HcOptimalNode {
3748            price: BtMatcher::cached_lit_length_price(
3749                profile,
3750                $stats,
3751                initial_litlen,
3752                &mut ll_cache,
3753                ll_price_stamp,
3754            ),
3755            litlen: initial_litlen as u32,
3756            reps: initial_reps,
3757            ..HcOptimalNode::default()
3758        };
3759        let sufficient_len = profile.sufficient_match_len;
3760        let ll0_price = BtMatcher::cached_lit_length_price(
3761            profile,
3762            $stats,
3763            0,
3764            &mut ll_cache,
3765            ll_price_stamp,
3766        );
3767        let ll1_price = BtMatcher::cached_lit_length_price(
3768            profile,
3769            $stats,
3770            1,
3771            &mut ll_cache,
3772            ll_price_stamp,
3773        );
3774        let mut pos = 1usize;
3775        let mut last_pos = 0usize;
3776        let mut forced_end: Option<usize> = None;
3777        let mut forced_end_state: Option<HcOptimalNode> = None;
3778        let mut seed_forced_shortest_path = false;
3779        let mut opt_ldm = HcOptLdmState {
3780            seq_store: HcRawSeqStore {
3781                pos: 0,
3782                pos_in_sequence: 0,
3783                size: $self.backend.bt_mut().ldm_sequences.len(),
3784            },
3785            ..HcOptLdmState::default()
3786        };
3787        let has_ldm = !$self.backend.bt_mut().ldm_sequences.is_empty();
3788        if has_ldm {
3789            // `ldm_sequences` are emitted in BLOCK-relative coordinates,
3790            // but this optimal-parser pass runs over a SEGMENT of the
3791            // block starting at block-offset `$block_offset` and uses
3792            // segment-relative positions throughout. Fast-forward the raw
3793            // seq-store cursor past the bytes covered by earlier segments
3794            // so the (segment-relative) LDM windows below land at the
3795            // correct positions. Idempotent: `ldm_skip_raw_seq_store_bytes`
3796            // recomputes from `pos = 0`, so re-running it per segment is
3797            // safe. Without this, every segment after the first re-applied
3798            // the block's leading LDM windows at the wrong offset, emitting
3799            // matches that copy the wrong bytes (undecodable frame).
3800            if ldm_block_offset > 0 {
3801                $self
3802                    .backend
3803                    .bt_mut()
3804                    .ldm_skip_raw_seq_store_bytes(&mut opt_ldm.seq_store, ldm_block_offset);
3805            }
3806            $self
3807                .backend
3808                .bt_mut()
3809                .ldm_get_next_match_and_update_seq_store(&mut opt_ldm, 0, $current_len);
3810        }
3811
3812        // Donor-like seed at rPos=0: initialize frontier with matches starting
3813        // at current position before entering the generic forward DP loop.
3814        if $current_len >= min_match_len {
3815            let seed_ldm = if has_ldm {
3816                $self.backend.bt_mut().ldm_process_match_candidate(
3817                    &mut opt_ldm,
3818                    0,
3819                    $current_len,
3820                    min_match_len,
3821                )
3822            } else {
3823                None
3824            };
3825            candidates.clear();
3826            // SAFETY: wrapper is in the same target_feature umbrella as the
3827            // `$collect` kernel variant; the runtime kernel detector already
3828            // gated entry into the wrapper.
3829            unsafe {
3830                $self.$collect::<$strategy_ty, true>(
3831                    $current_abs_start,
3832                    current_abs_end,
3833                    profile,
3834                    HcCandidateQuery {
3835                        reps: initial_reps,
3836                        lit_len: initial_litlen,
3837                        ldm_candidate: seed_ldm,
3838                    },
3839                    &mut candidates,
3840                )
3841            };
3842            if !candidates.is_empty() {
3843                // `min_match_len >= HC_FORMAT_MINMATCH (3)` by invariant.
3844                last_pos = (min_match_len - 1).min(frontier_limit);
3845                for p in 1..min_match_len.min(frontier_buffer_size) {
3846                    BtMatcher::reset_opt_node(&mut nodes[p]);
3847                    // `initial_litlen` is the litlen carried from prior
3848                    // optimal-plan segments — its real bound is the
3849                    // current block length (the frame compressor caps
3850                    // block scan at `HC_BLOCKSIZE_MAX`), not the segment
3851                    // `current_len`. `p < min_match_len` (small constant),
3852                    // so the sum stays well within `u32::MAX`. Use
3853                    // `checked_add` FIRST so the `usize` addition itself
3854                    // cannot overflow on i686 (where `usize` is 32-bit
3855                    // and a wrapping `+` would slip past `try_from`).
3856                    let seed_litlen = initial_litlen
3857                        .checked_add(p)
3858                        .and_then(|s| u32::try_from(s).ok())
3859                        .expect("optimal parser seed litlen out of u32 range");
3860                    nodes[p].litlen = seed_litlen;
3861                }
3862            }
3863
3864            if let Some(candidate) = candidates.last() {
3865                let longest_len = candidate.match_len.min($current_len);
3866                if longest_len > sufficient_len {
3867                    let off_base = BtMatcher::encode_offset_base_with_reps(
3868                        candidate.offset as u32,
3869                        initial_litlen,
3870                        initial_reps,
3871                    );
3872                    let off_price = profile
3873                        .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
3874                    let ml_price = BtMatcher::cached_match_length_price(
3875                        profile,
3876                        $stats,
3877                        longest_len,
3878                        &mut ml_cache,
3879                        ml_price_stamp,
3880                    );
3881                    let seq_cost = BtMatcher::add_prices(
3882                        ll0_price,
3883                        profile.match_price_from_parts(off_price, ml_price, $stats),
3884                    );
3885                    let forced_price = BtMatcher::add_prices(nodes[0].price, seq_cost);
3886                    let forced_state = HcOptimalNode {
3887                        price: forced_price,
3888                        off: candidate.offset as u32,
3889                        mlen: longest_len as u32,
3890                        litlen: 0,
3891                        reps: initial_reps,
3892                    };
3893                    if longest_len < frontier_buffer_size && forced_price < nodes[longest_len].price {
3894                        nodes[longest_len] = forced_state;
3895                    }
3896                    forced_end = Some(longest_len);
3897                    forced_end_state = Some(forced_state);
3898                    seed_forced_shortest_path = true;
3899                }
3900            }
3901            if !seed_forced_shortest_path {
3902                let mut prev_max_len = min_match_len - 1;
3903                for candidate in candidates.iter() {
3904                    let max_match_len = candidate.match_len.min(frontier_limit);
3905                    if max_match_len < min_match_len {
3906                        continue;
3907                    }
3908                    let start_len = (prev_max_len + 1).max(min_match_len);
3909                    if start_len > max_match_len {
3910                        prev_max_len = prev_max_len.max(max_match_len);
3911                        continue;
3912                    }
3913                    if max_match_len > last_pos {
3914                        BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_match_len);
3915                    }
3916                    let off_base = BtMatcher::encode_offset_base_with_reps(
3917                        candidate.offset as u32,
3918                        initial_litlen,
3919                        initial_reps,
3920                    );
3921                    let off_price = profile
3922                        .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
3923                    debug_assert!(max_match_len < frontier_buffer_size);
3924                    let nodes0_price = nodes[0].price;
3925                    for match_len in (start_len..=max_match_len).rev() {
3926                        let ml_price = BtMatcher::cached_match_length_price(
3927                            profile,
3928                            $stats,
3929                            match_len,
3930                            &mut ml_cache,
3931                            ml_price_stamp,
3932                        );
3933                        let seq_cost = BtMatcher::add_prices(
3934                            ll0_price,
3935                            profile.match_price_from_parts(off_price, ml_price, $stats),
3936                        );
3937                        let next_cost = BtMatcher::add_prices(nodes0_price, seq_cost);
3938                        let node_price = unsafe { nodes.get_unchecked(match_len).price };
3939                        if match_len > last_pos || next_cost < node_price {
3940                            let slot = unsafe { nodes.get_unchecked_mut(match_len) };
3941                            *slot = HcOptimalNode {
3942                                price: next_cost,
3943                                off: candidate.offset as u32,
3944                                mlen: match_len as u32,
3945                                litlen: 0,
3946                                reps: initial_reps,
3947                            };
3948                            if match_len > last_pos {
3949                                last_pos = match_len;
3950                            }
3951                        } else if abort_on_worse_match {
3952                            break;
3953                        }
3954                    }
3955                    prev_max_len = prev_max_len.max(max_match_len);
3956                }
3957                if last_pos + 1 < frontier_buffer_size {
3958                    nodes[last_pos + 1].price = u32::MAX;
3959                }
3960            }
3961        }
3962        while !seed_forced_shortest_path && pos <= last_pos && pos <= frontier_limit {
3963            debug_assert!(pos + 1 < frontier_buffer_size);
3964            let prev_node = unsafe { *nodes.get_unchecked(pos - 1) };
3965            if prev_node.price != u32::MAX {
3966                let lit_len = prev_node.litlen as usize + 1;
3967                let lit_price = {
3968                    let bt = $self.backend.bt_mut();
3969                    BtMatcher::cached_literal_price(
3970                        profile,
3971                        $stats,
3972                        $current[pos - 1],
3973                        &mut bt.opt_lit_price_scratch,
3974                        &mut bt.opt_lit_price_generation,
3975                        lit_price_stamp,
3976                    )
3977                };
3978                let ll_delta = BtMatcher::cached_lit_length_delta_price(
3979                    profile,
3980                    $stats,
3981                    lit_len,
3982                    &mut ll_cache,
3983                    ll_price_stamp,
3984                );
3985                let lit_cost = BtMatcher::add_price_delta(prev_node.price, lit_price, ll_delta);
3986                let node_pos_price = unsafe { nodes.get_unchecked(pos).price };
3987                if lit_cost <= node_pos_price {
3988                    let prev_match = unsafe { *nodes.get_unchecked(pos) };
3989                    let slot = unsafe { nodes.get_unchecked_mut(pos) };
3990                    *slot = prev_node;
3991                    slot.litlen = lit_len as u32;
3992                    slot.price = lit_cost;
3993                    #[allow(clippy::collapsible_if)]
3994                    if opt_level
3995                        && prev_match.mlen > 0
3996                        && prev_match.litlen == 0
3997                        && pos < $current_len
3998                    {
3999                        if ll1_price < ll0_price {
4000                            let next_lit_price = {
4001                                let bt = $self.backend.bt_mut();
4002                                BtMatcher::cached_literal_price(
4003                                    profile,
4004                                    $stats,
4005                                    $current[pos],
4006                                    &mut bt.opt_lit_price_scratch,
4007                                    &mut bt.opt_lit_price_generation,
4008                                    lit_price_stamp,
4009                                )
4010                            };
4011                            let with1literal = BtMatcher::add_price_delta(
4012                                prev_match.price,
4013                                next_lit_price,
4014                                ll1_price as i32 - ll0_price as i32,
4015                            );
4016                            let ll_delta_next = BtMatcher::cached_lit_length_delta_price(
4017                                profile,
4018                                $stats,
4019                                lit_len + 1,
4020                                &mut ll_cache,
4021                                ll_price_stamp,
4022                            );
4023                            let with_more_literals =
4024                                BtMatcher::add_price_delta(lit_cost, next_lit_price, ll_delta_next);
4025                            let next = pos + 1;
4026                            let next_price = unsafe { nodes.get_unchecked(next).price };
4027                            if with1literal < with_more_literals && with1literal < next_price {
4028                                // Donor parity (zstd_opt.c:1232): `cur >= prevMatch.mlen`.
4029                                debug_assert!(pos >= prev_match.mlen as usize);
4030                                let prev_pos = pos - prev_match.mlen as usize;
4031                                {
4032                                    let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
4033                                    let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
4034                                        prev_match.off,
4035                                        prev_state.litlen as usize,
4036                                        prev_state.reps,
4037                                    );
4038                                    let slot = unsafe { nodes.get_unchecked_mut(next) };
4039                                    *slot = prev_match;
4040                                    slot.reps = reps_after_match;
4041                                    slot.litlen = 1;
4042                                    slot.price = with1literal;
4043                                    if next > last_pos {
4044                                        last_pos = next;
4045                                    }
4046                                }
4047                            }
4048                        }
4049                    }
4050                }
4051            }
4052
4053            // Memory-resident DP (donor parity): read opt[cur] fields on
4054            // demand instead of holding a 28-byte node copy live across the
4055            // per-position `$collect` call below. The held copy forced LLVM
4056            // to spill reps[3] + litlen around the (non-inlinable) call;
4057            // reading the fields fresh on each side keeps them out of the
4058            // cross-call live set. `nodes[pos]` is stable across `$collect`
4059            // (it only fills `candidates`), so post-call reads are identical.
4060            let base_cost = unsafe { nodes.get_unchecked(pos).price };
4061            if base_cost == u32::MAX {
4062                pos += 1;
4063                continue;
4064            }
4065            {
4066                let base_node = unsafe { *nodes.get_unchecked(pos) };
4067                if base_node.mlen > 0 && base_node.litlen == 0 {
4068                    // Donor parity (zstd_opt.c:1255): `cur >= opt[cur].mlen`.
4069                    debug_assert!(pos >= base_node.mlen as usize);
4070                    let prev_pos = pos - base_node.mlen as usize;
4071                    let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
4072                    let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
4073                        base_node.off,
4074                        prev_state.litlen as usize,
4075                        prev_state.reps,
4076                    );
4077                    unsafe { nodes.get_unchecked_mut(pos).reps = reps_after_match };
4078                }
4079            }
4080
4081            if pos + 8 > $current_len {
4082                pos += 1;
4083                continue;
4084            }
4085
4086            if pos == last_pos {
4087                break;
4088            }
4089
4090            let next_price = unsafe { nodes.get_unchecked(pos + 1).price };
4091            // `saturating_add` is REQUIRED here, not a masked bug: `base_cost`
4092            // is a node price that can be the `u32::MAX` "unreachable" sentinel,
4093            // and saturating keeps `base_cost + margin` pinned at MAX so the
4094            // comparison stays correct. Plain `+` would wrap the sentinel and
4095            // flip the abort decision (a ratio bug / debug overflow panic).
4096            if abort_on_worse_match
4097                && next_price <= base_cost.saturating_add(HC_BITCOST_MULTIPLIER / 2)
4098            {
4099                pos += 1;
4100                continue;
4101            }
4102
4103            let abs_pos = $current_abs_start + pos;
4104            let ldm_candidate = if has_ldm {
4105                $self.backend.bt_mut().ldm_process_match_candidate(
4106                    &mut opt_ldm,
4107                    pos,
4108                    $current_len - pos,
4109                    min_match_len,
4110                )
4111            } else {
4112                None
4113            };
4114            candidates.clear();
4115            // SAFETY: same umbrella as `$collect`. Query fields are read
4116            // fresh here (consumed into the call's argument) so they do not
4117            // stay live across the call; the post-call reads below are a
4118            // separate, fresh load of the same stable `nodes[pos]`.
4119            unsafe {
4120                $self.$collect::<$strategy_ty, true>(
4121                    abs_pos,
4122                    current_abs_end,
4123                    profile,
4124                    HcCandidateQuery {
4125                        reps: nodes.get_unchecked(pos).reps,
4126                        lit_len: nodes.get_unchecked(pos).litlen as usize,
4127                        ldm_candidate,
4128                    },
4129                    &mut candidates,
4130                )
4131            };
4132            // Post-call reads of opt[cur]: fresh, born after `$collect`, so
4133            // never part of the cross-call live set (see memory-resident note
4134            // above). `nodes[pos]` is untouched by `$collect`.
4135            let base_reps = unsafe { nodes.get_unchecked(pos).reps };
4136            let base_litlen = unsafe { nodes.get_unchecked(pos).litlen as usize };
4137            if let Some(candidate) = candidates.last() {
4138                let longest_len = candidate.match_len.min($current_len - pos);
4139                if longest_len > sufficient_len
4140                    || pos + longest_len >= HC_OPT_NUM
4141                    || pos + longest_len >= $current_len
4142                {
4143                    let lit_len = base_litlen;
4144                    let off_base = BtMatcher::encode_offset_base_with_reps(
4145                        candidate.offset as u32,
4146                        lit_len,
4147                        base_reps,
4148                    );
4149                    let off_price = profile
4150                        .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
4151                    let ml_price = BtMatcher::cached_match_length_price(
4152                        profile,
4153                        $stats,
4154                        longest_len,
4155                        &mut ml_cache,
4156                        ml_price_stamp,
4157                    );
4158                    let seq_cost = BtMatcher::add_prices(
4159                        ll0_price,
4160                        profile.match_price_from_parts(off_price, ml_price, $stats),
4161                    );
4162                    let forced_price = BtMatcher::add_prices(base_cost, seq_cost);
4163                    let end_pos = (pos + longest_len).min($current_len);
4164                    forced_end = Some(end_pos);
4165                    forced_end_state = Some(HcOptimalNode {
4166                        price: forced_price,
4167                        off: candidate.offset as u32,
4168                        mlen: longest_len as u32,
4169                        litlen: 0,
4170                        reps: base_reps,
4171                    });
4172                    break;
4173                }
4174            }
4175            let mut prev_max_len = min_match_len - 1;
4176            for candidate in candidates.iter() {
4177                // Outer loop guards `pos <= frontier_limit` (see the
4178                // `while ... pos <= frontier_limit` condition); the
4179                // subtraction below is therefore safe.
4180                debug_assert!(pos <= frontier_limit);
4181                let max_match_len = candidate
4182                    .match_len
4183                    .min($current_len - pos)
4184                    .min(frontier_limit - pos);
4185                let min_len = min_match_len;
4186                if max_match_len < min_len {
4187                    continue;
4188                }
4189                let start_len = (prev_max_len + 1).max(min_len);
4190                if start_len > max_match_len {
4191                    prev_max_len = prev_max_len.max(max_match_len);
4192                    continue;
4193                }
4194                let max_next = pos + max_match_len;
4195                if max_next > last_pos {
4196                    BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_next);
4197                }
4198                let lit_len = base_litlen;
4199                let off_base = BtMatcher::encode_offset_base_with_reps(
4200                    candidate.offset as u32,
4201                    lit_len,
4202                    base_reps,
4203                );
4204                let off_price = profile
4205                    .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
4206                debug_assert!(pos + max_match_len < frontier_buffer_size);
4207                for match_len in (start_len..=max_match_len).rev() {
4208                    let next = pos + match_len;
4209                    let ml_price = BtMatcher::cached_match_length_price(
4210                        profile,
4211                        $stats,
4212                        match_len,
4213                        &mut ml_cache,
4214                        ml_price_stamp,
4215                    );
4216                    let seq_cost = BtMatcher::add_prices(
4217                        ll0_price,
4218                        profile.match_price_from_parts(off_price, ml_price, $stats),
4219                    );
4220                    let next_cost = BtMatcher::add_prices(base_cost, seq_cost);
4221                    let node_next_price = unsafe { nodes.get_unchecked(next).price };
4222                    let improved = next > last_pos || next_cost < node_next_price;
4223                    if improved {
4224                        let slot = unsafe { nodes.get_unchecked_mut(next) };
4225                        *slot = HcOptimalNode {
4226                            price: next_cost,
4227                            off: candidate.offset as u32,
4228                            mlen: match_len as u32,
4229                            litlen: 0,
4230                            reps: base_reps,
4231                        };
4232                        if next > last_pos {
4233                            last_pos = next;
4234                        }
4235                    } else if abort_on_worse_match {
4236                        break;
4237                    }
4238                }
4239                prev_max_len = prev_max_len.max(max_match_len);
4240            }
4241
4242            if last_pos + 1 < frontier_buffer_size {
4243                unsafe {
4244                    nodes.get_unchecked_mut(last_pos + 1).price = u32::MAX;
4245                }
4246            }
4247            pos += 1;
4248        }
4249
4250        if last_pos == 0 {
4251            if $current_len == 0 {
4252                let price = nodes[0].price;
4253                return $self.backend.bt_mut().finish_optimal_plan(
4254                    HcOptimalPlanBuffers {
4255                        nodes,
4256                        candidates,
4257                        store,
4258                        price_arena,
4259                    },
4260                    (price, initial_reps, initial_litlen, 0),
4261                );
4262            }
4263            let lit_price = {
4264                let bt = $self.backend.bt_mut();
4265                BtMatcher::cached_literal_price(
4266                    profile,
4267                    $stats,
4268                    $current[0],
4269                    &mut bt.opt_lit_price_scratch,
4270                    &mut bt.opt_lit_price_generation,
4271                    lit_price_stamp,
4272                )
4273            };
4274            // `initial_litlen` is carried across optimal-plan segments;
4275            // its real bound is the current block length, not
4276            // `current_len`. On i686 (32-bit `usize`) `+ 1` could
4277            // theoretically wrap if the invariant ever broke. Catch
4278            // that explicitly via `checked_add` rather than letting a
4279            // wrapping sum slip into the price lookup.
4280            let next_litlen = initial_litlen
4281                .checked_add(1)
4282                .expect("optimal parser next litlen out of usize range");
4283            let ll_delta = BtMatcher::cached_lit_length_delta_price(
4284                profile,
4285                $stats,
4286                next_litlen,
4287                &mut ll_cache,
4288                ll_price_stamp,
4289            );
4290            let price = BtMatcher::add_price_delta(nodes[0].price, lit_price, ll_delta);
4291            return $self.backend.bt_mut().finish_optimal_plan(
4292                HcOptimalPlanBuffers {
4293                    nodes,
4294                    candidates,
4295                    store,
4296                    price_arena,
4297                },
4298                (price, initial_reps, next_litlen, 1),
4299            );
4300        }
4301
4302        let target_pos = forced_end.unwrap_or(last_pos.min(frontier_limit));
4303        let last_stretch = if let Some(forced_state) = forced_end_state {
4304            forced_state
4305        } else {
4306            nodes[target_pos]
4307        };
4308        if last_stretch.price == u32::MAX {
4309            return $self.backend.bt_mut().finish_optimal_plan(
4310                HcOptimalPlanBuffers {
4311                    nodes,
4312                    candidates,
4313                    store,
4314                    price_arena,
4315                },
4316                (u32::MAX, initial_reps, initial_litlen, $current_len),
4317            );
4318        }
4319
4320        if last_stretch.mlen == 0 {
4321            return $self.backend.bt_mut().finish_optimal_plan(
4322                HcOptimalPlanBuffers {
4323                    nodes,
4324                    candidates,
4325                    store,
4326                    price_arena,
4327                },
4328                (
4329                    last_stretch.price,
4330                    last_stretch.reps,
4331                    last_stretch.litlen as usize,
4332                    target_pos.min($current_len),
4333                ),
4334            );
4335        }
4336
4337        let mut cur = target_pos.saturating_sub(last_stretch.mlen as usize);
4338        let end_reps = if last_stretch.litlen == 0 {
4339            let prev_state = nodes[cur];
4340            let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
4341                last_stretch.off,
4342                prev_state.litlen as usize,
4343                prev_state.reps,
4344            );
4345            reps_after_match
4346        } else {
4347            let tail_literals = last_stretch.litlen as usize;
4348            if cur < tail_literals {
4349                return $self.backend.bt_mut().finish_optimal_plan(
4350                    HcOptimalPlanBuffers {
4351                        nodes,
4352                        candidates,
4353                        store,
4354                        price_arena,
4355                    },
4356                    (
4357                        last_stretch.price,
4358                        last_stretch.reps,
4359                        tail_literals,
4360                        target_pos.min($current_len),
4361                    ),
4362                );
4363            }
4364            cur -= tail_literals;
4365            last_stretch.reps
4366        };
4367        let store_end = cur + 2;
4368        if store.len() <= store_end {
4369            store.resize(store_end + 1, HcOptimalNode::default());
4370        }
4371        let mut store_start;
4372        let mut stretch_pos = cur;
4373
4374        if last_stretch.litlen > 0 {
4375            store[store_end] = HcOptimalNode {
4376                litlen: last_stretch.litlen,
4377                mlen: 0,
4378                ..HcOptimalNode::default()
4379            };
4380            store_start = store_end.saturating_sub(1);
4381            store[store_start] = last_stretch;
4382        }
4383        store[store_end] = last_stretch;
4384        store_start = store_end;
4385
4386        loop {
4387            let next_stretch = nodes[stretch_pos];
4388            store[store_start].litlen = next_stretch.litlen;
4389            if next_stretch.mlen == 0 {
4390                break;
4391            }
4392            if store_start == 0 {
4393                break;
4394            }
4395            store_start -= 1;
4396            store[store_start] = next_stretch;
4397            // Parser invariant: every emitted stretch is bounded by the
4398            // current block, so `litlen + mlen <= current_len <=
4399            // HC_BLOCKSIZE_MAX (128 KiB)`. The `as usize` widening + raw
4400            // `+` is safe on 32-bit targets — two u32 values do NOT
4401            // automatically fit in `usize` on i686, the block bound is
4402            // what makes this addition safe.
4403            let litlen = next_stretch.litlen as usize;
4404            let mlen = next_stretch.mlen as usize;
4405            debug_assert!(litlen + mlen <= $current_len);
4406            let step = litlen + mlen;
4407            if step == 0 || stretch_pos < step {
4408                break;
4409            }
4410            stretch_pos -= step;
4411        }
4412
4413        let mut tail_literals = initial_litlen;
4414        let mut store_pos = store_start;
4415        while store_pos <= store_end {
4416            let stretch = store[store_pos];
4417            let llen = stretch.litlen as usize;
4418            let mlen = stretch.mlen as usize;
4419            if mlen == 0 {
4420                tail_literals = llen;
4421                store_pos += 1;
4422                continue;
4423            }
4424            $out.push(HcOptimalSequence {
4425                offset: stretch.off,
4426                match_len: mlen as u32,
4427                lit_len: llen as u32,
4428            });
4429            tail_literals = 0;
4430            store_pos += 1;
4431        }
4432        let result = (
4433            last_stretch.price,
4434            end_reps,
4435            if last_stretch.litlen > 0 {
4436                last_stretch.litlen as usize
4437            } else {
4438                tail_literals
4439            },
4440            target_pos.min($current_len),
4441        );
4442        $self.backend.bt_mut().finish_optimal_plan(
4443            HcOptimalPlanBuffers {
4444                nodes,
4445                candidates,
4446                store,
4447                price_arena,
4448            },
4449            result,
4450        )
4451    }};
4452}
4453
4454/// `collect_optimal_candidates_initialized` body parameterized over the per-CPU
4455/// kernel: the `$cpl` path is the kernel's `common_prefix_len_ptr` (used in
4456/// the HC chain walk fallback), and the four method-name substitutions
4457/// (`$bt_update`, `$bt_insert`, `$for_each_rep`, `$hash3`) route to the
4458/// kernel-specific wrappers of the inner helpers. With every helper under
4459/// the same `target_feature` umbrella, the entire per-position pipeline
4460/// (BT-tree fill + rep probing + hash3 probing + BT match collection /
4461/// HC chain walk) inlines without ABI barriers on the level22 hot path.
4462macro_rules! collect_optimal_candidates_initialized_body {
4463    (
4464        $self:expr,
4465        $strategy_ty:ty,
4466        $abs_pos:ident,
4467        $current_abs_end:ident,
4468        $profile:ident,
4469        $query:ident,
4470        $out:ident,
4471        $bt_matchfinder:ident,
4472        $bt_update:ident,
4473        $bt_insert:ident,
4474        $for_each_rep:ident,
4475        $hash3:ident,
4476        $cpl:path $(,)?
4477    ) => {{
4478        // Per-strategy compile-time const: only BtUltra2 drives the
4479        // hash3 short-match table. All other monomorphisations drop
4480        // the entire hash3 lookup block at codegen time. The relaxed
4481        // implication enforces only the direction we depend on:
4482        // if the strategy declares hash3, the table must be live.
4483        // The reverse (`hash3_log != 0` without `USE_HASH3`) is OK —
4484        // a future caller may pre-allocate hash3 storage without
4485        // wiring the BtUltra2 path through.
4486        let use_hash3: bool = <$strategy_ty as super::strategy::Strategy>::USE_HASH3;
4487        debug_assert!(!$self.table.hash_table.is_empty());
4488        debug_assert!($self.table.hash3_log == 0 || !$self.table.hash3_table.is_empty());
4489        debug_assert!(
4490            !use_hash3 || $self.table.hash3_log != 0,
4491            "Strategy::USE_HASH3 = true but runtime hash3_log is 0 — call configure() first",
4492        );
4493        debug_assert!(!$self.table.chain_table.is_empty());
4494        let min_match_len = HC_OPT_MIN_MATCH_LEN;
4495        let reps = $query.reps;
4496        let lit_len = $query.lit_len;
4497        let ldm_candidate = $query.ldm_candidate;
4498        $out.clear();
4499        if $abs_pos < $self.table.skip_insert_until_abs {
4500            if let Some(ldm) = ldm_candidate {
4501                let mut best_len_for_skip = 0usize;
4502                let _ = super::bt::BtMatcher::push_candidate_ladder(
4503                    $out,
4504                    &mut best_len_for_skip,
4505                    ldm,
4506                    min_match_len,
4507                );
4508            }
4509            return;
4510        }
4511        if $bt_matchfinder {
4512            // SAFETY: caller is in the same target_feature umbrella as
4513            // `$bt_update`; the runtime kernel detector already gated entry.
4514            unsafe { $self.table.$bt_update($abs_pos, $current_abs_end) };
4515        }
4516        let current_idx = $abs_pos - $self.table.history_abs_start;
4517        if current_idx + 4 > $self.table.live_history().len() {
4518            if let Some(ldm) = ldm_candidate {
4519                let mut best_len_for_skip = 0usize;
4520                let _ = super::bt::BtMatcher::push_candidate_ladder(
4521                    $out,
4522                    &mut best_len_for_skip,
4523                    ldm,
4524                    min_match_len,
4525                );
4526            }
4527            return;
4528        }
4529        let mut best_len_for_skip = 0usize;
4530        let mut skip_further_match_search = false;
4531        let mut rep_len_candidate_found = false;
4532        // SAFETY: same umbrella; closure capture is monomorphized per call.
4533        unsafe {
4534            $self.hc.$for_each_rep(
4535                &$self.table,
4536                $abs_pos,
4537                lit_len,
4538                reps,
4539                $current_abs_end,
4540                min_match_len,
4541                |rep| {
4542                    if rep.match_len >= min_match_len {
4543                        rep_len_candidate_found = true;
4544                    }
4545                    let _ = super::bt::BtMatcher::push_candidate_ladder(
4546                        $out,
4547                        &mut best_len_for_skip,
4548                        rep,
4549                        min_match_len,
4550                    );
4551                    if rep.match_len > $profile.sufficient_match_len {
4552                        skip_further_match_search = true;
4553                    }
4554                    // `for_each_repcode_candidate_with_reps` caps
4555                    // `rep.match_len` at the per-call `tail_limit =
4556                    // current_abs_end - abs_pos`, so `abs_pos +
4557                    // rep.match_len <= current_abs_end`. The raw sum
4558                    // therefore stays in `usize` on every supported
4559                    // target.
4560                    if $abs_pos + rep.match_len >= $current_abs_end {
4561                        skip_further_match_search = true;
4562                    }
4563                },
4564            )
4565        };
4566        // Hash3 lookup runs only when the strategy enables it. The
4567        // `use_hash3` binding above is a per-monomorphisation const,
4568        // so non-BtUltra2 instances drop this entire block.
4569        if use_hash3 && !skip_further_match_search && best_len_for_skip < min_match_len {
4570            $self.table.update_hash3_until($abs_pos);
4571            // SAFETY: same umbrella for hash3_candidate.
4572            if let Some(h3) = unsafe {
4573                $self
4574                    .table
4575                    .$hash3($abs_pos, $current_abs_end, min_match_len)
4576            } {
4577                let _ = super::bt::BtMatcher::push_candidate_ladder(
4578                    $out,
4579                    &mut best_len_for_skip,
4580                    h3,
4581                    min_match_len,
4582                );
4583                if !rep_len_candidate_found
4584                    && (h3.match_len > $profile.sufficient_match_len
4585                        || $abs_pos + h3.match_len >= $current_abs_end)
4586                {
4587                    $self.table.skip_insert_until_abs = $abs_pos + 1;
4588                    skip_further_match_search = true;
4589                }
4590            }
4591        }
4592        if !skip_further_match_search && $bt_matchfinder {
4593            // SAFETY: same umbrella for bt_insert_and_collect_matches.
4594            unsafe {
4595                $self.table.$bt_insert(
4596                    $abs_pos,
4597                    $current_abs_end,
4598                    $profile,
4599                    min_match_len,
4600                    &mut best_len_for_skip,
4601                    $out,
4602                )
4603            };
4604        } else if !skip_further_match_search {
4605            $self.table.insert_position($abs_pos);
4606            let max_chain_depth = $profile.max_chain_depth.min($self.hc.search_depth);
4607            let concat = $self.table.live_history();
4608            // Raw `+ 9` is safe here — see `bt_insert_step_no_rebase_body!`
4609            // for the full discussion of the upstream `STREAM_ABS_HEADROOM`
4610            // cap in `MatchTable::add_data`.
4611            let mut match_end_abs = $abs_pos + 9;
4612            if max_chain_depth > 0 {
4613                for (visited, candidate_abs) in $self
4614                    .hc
4615                    .chain_candidates(&$self.table, $abs_pos)
4616                    .into_iter()
4617                    .enumerate()
4618                {
4619                    if visited >= max_chain_depth {
4620                        break;
4621                    }
4622                    if candidate_abs == usize::MAX {
4623                        break;
4624                    }
4625                    if candidate_abs < $self.table.window_low_abs_for_target($abs_pos)
4626                        || candidate_abs >= $abs_pos
4627                    {
4628                        continue;
4629                    }
4630                    let candidate_idx = candidate_abs - $self.table.history_abs_start;
4631                    debug_assert!(
4632                        $abs_pos <= $current_abs_end,
4633                        "HC chain walker called past current block end"
4634                    );
4635                    let tail_limit = $current_abs_end - $abs_pos;
4636                    let base = concat.as_ptr();
4637                    // SAFETY: history-relative indices; `tail_limit` bounds
4638                    // the scan within `concat`. `$cpl` is the kernel-specific
4639                    // common_prefix_len_ptr — call inlines because the
4640                    // surrounding wrapper carries the same target_feature.
4641                    let match_len =
4642                        unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), tail_limit) };
4643                    if match_len < min_match_len {
4644                        continue;
4645                    }
4646                    let offset = $abs_pos - candidate_abs;
4647                    if super::bt::BtMatcher::push_candidate_ladder(
4648                        $out,
4649                        &mut best_len_for_skip,
4650                        MatchCandidate {
4651                            start: $abs_pos,
4652                            offset,
4653                            match_len,
4654                        },
4655                        min_match_len,
4656                    ) {
4657                        let candidate_end = candidate_abs + match_len;
4658                        if candidate_end > match_end_abs {
4659                            match_end_abs = candidate_end;
4660                        }
4661                    }
4662                    if match_len > HC_OPT_NUM || $abs_pos + match_len >= $current_abs_end {
4663                        break;
4664                    }
4665                }
4666            }
4667            // `match_end_abs` initialized to `abs_pos + 9`; monotonic
4668            // updates only ever extend it, so `match_end_abs - 8 >= 1`.
4669            $self.table.skip_insert_until_abs =
4670                $self.table.skip_insert_until_abs.max(match_end_abs - 8);
4671        }
4672        if let Some(ldm) = ldm_candidate {
4673            let _ = super::bt::BtMatcher::push_candidate_ladder(
4674                $out,
4675                &mut best_len_for_skip,
4676                ldm,
4677                min_match_len,
4678            );
4679        }
4680    }};
4681}
4682
4683/// `hash3_candidate` body parameterized over the per-CPU
4684/// `common_prefix_len_ptr` symbol. The hash3 probe checks one candidate per
4685/// position when invoked, so the per-call ABI savings compound across the
4686/// segment. Crate-private (see `bt_insert_step_no_rebase_body!`).
4687macro_rules! hash3_candidate_body {
4688    (
4689        $table:expr,
4690        $abs_pos:ident,
4691        $current_abs_end:ident,
4692        $min_match_len:ident,
4693        $cpl:path $(,)?
4694    ) => {{
4695        if $table.hash3_log == 0 {
4696            return None;
4697        }
4698        let idx = $abs_pos.checked_sub($table.history_abs_start)?;
4699        let concat = $table.live_history();
4700        if idx + 4 > concat.len() {
4701            return None;
4702        }
4703        let hash3 = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
4704            concat,
4705            idx,
4706            $table.hash3_log,
4707            3,
4708        );
4709        let entry = $table
4710            .hash3_table
4711            .get(hash3)
4712            .copied()
4713            .unwrap_or($crate::encoding::match_table::storage::HC_EMPTY);
4714        let candidate_abs =
4715            $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
4716                entry,
4717                $table.position_base,
4718                $table.index_shift,
4719            )?;
4720        if candidate_abs < $table.history_abs_start || candidate_abs >= $abs_pos {
4721            return None;
4722        }
4723        let offset = $abs_pos - candidate_abs;
4724        if offset >= $crate::encoding::bt::HC3_MAX_OFFSET {
4725            return None;
4726        }
4727        let candidate_idx = candidate_abs - $table.history_abs_start;
4728        let tail_limit = $current_abs_end.saturating_sub($abs_pos);
4729        let base = concat.as_ptr();
4730        // SAFETY: candidate/idx are within history range; tail_limit
4731        // bounds the scan within `concat`.
4732        let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(idx), tail_limit) };
4733        (match_len >= $min_match_len).then_some($crate::encoding::opt::types::MatchCandidate {
4734            start: $abs_pos,
4735            offset,
4736            match_len,
4737        })
4738    }};
4739}
4740pub(crate) use hash3_candidate_body;
4741
4742/// `for_each_repcode_candidate_with_reps` body parameterized over the per-CPU
4743/// `common_prefix_len_ptr` symbol so the per-rep prefix probe inlines under
4744/// the wrapper's `target_feature` umbrella instead of crossing the ABI
4745/// boundary through the dispatcher. Three rep probes per encoded position →
4746/// thousands per segment, so the per-call barrier was non-trivial.
4747///
4748/// The callback `f` runs in the wrapper's umbrella context too, so closures
4749/// that capture mutable state still work (FnMut). Crate-private
4750/// (see `bt_insert_step_no_rebase_body!`).
4751macro_rules! for_each_repcode_candidate_body {
4752    (
4753        $table:expr,
4754        $abs_pos:ident,
4755        $lit_len:ident,
4756        $reps:ident,
4757        $current_abs_end:ident,
4758        $min_match_len:ident,
4759        $f:ident,
4760        $cpl:path $(,)?
4761    ) => {{
4762        let rep_offsets: [Option<usize>; 3] = if $lit_len == 0 {
4763            [
4764                Some($reps[1] as usize),
4765                Some($reps[2] as usize),
4766                ($reps[0] > 1).then_some(($reps[0] - 1) as usize),
4767            ]
4768        } else {
4769            [
4770                Some($reps[0] as usize),
4771                Some($reps[1] as usize),
4772                Some($reps[2] as usize),
4773            ]
4774        };
4775        let concat = $table.live_history();
4776        let current_idx = $abs_pos - $table.history_abs_start;
4777        if current_idx + 4 > concat.len() {
4778            return;
4779        }
4780        let tail_limit = $current_abs_end.saturating_sub($abs_pos);
4781        let base = concat.as_ptr();
4782        let concat_len = concat.len();
4783        for rep in rep_offsets.into_iter().flatten() {
4784            if rep == 0 || rep > $abs_pos {
4785                continue;
4786            }
4787            let candidate_pos = $abs_pos - rep;
4788            if candidate_pos < $table.history_abs_start {
4789                continue;
4790            }
4791            let candidate_idx = candidate_pos - $table.history_abs_start;
4792            // Donor `ZSTD_readMINMATCH` gate (zstd_opt.c:657-674): a
4793            // 4-byte (3-byte when min_match_len == 3) equality probe
4794            // before the full prefix scan. Equivalent filtering — a
4795            // mismatch here means `match_len < min_match_len`, which
4796            // the post-scan check rejects anyway — but it skips the
4797            // prefix-kernel call for the common no-match case (rep
4798            // offsets rarely hit on low-redundancy input).
4799            //
4800            // SAFETY: `current_idx + 4 <= concat_len` (early return
4801            // above) and `candidate_idx < current_idx` (rep >= 1), so
4802            // both 4-byte reads stay inside `concat`.
4803            let gate_matches = unsafe {
4804                let cand = base.add(candidate_idx).cast::<u32>().read_unaligned();
4805                let cur = base.add(current_idx).cast::<u32>().read_unaligned();
4806                if $min_match_len == 3 {
4807                    // Compare the low-address 3 bytes regardless of
4808                    // endianness: byte-shift on LE, mask via to_le.
4809                    (cand.to_le() & 0x00FF_FFFF) == (cur.to_le() & 0x00FF_FFFF)
4810                } else {
4811                    cand == cur
4812                }
4813            };
4814            if !gate_matches {
4815                continue;
4816            }
4817            // SAFETY: `candidate_idx ≤ current_idx < concat_len` (since
4818            // candidate_pos ≤ abs_pos and we early-returned on
4819            // `current_idx + 4 > concat_len`). `max` clamps to the shorter
4820            // remaining run so neither pointer overruns `concat`.
4821            let max = (concat_len - candidate_idx)
4822                .min(concat_len - current_idx)
4823                .min(tail_limit);
4824            let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), max) };
4825            if match_len < $min_match_len {
4826                continue;
4827            }
4828            $f(MatchCandidate {
4829                start: $abs_pos,
4830                offset: rep,
4831                match_len,
4832            });
4833        }
4834    }};
4835}
4836pub(crate) use for_each_repcode_candidate_body;
4837
4838/// `bt_insert_and_collect_matches` body parameterized over the per-CPU
4839/// `count_match_from_indices` symbol. Same shape as
4840/// [`bt_insert_step_no_rebase_body`] — picks up the matching kernel through
4841/// `$cmf` so the per-iteration vector probe inlines under the wrapper's
4842/// `target_feature` umbrella. Returns nothing (matches the original method).
4843/// Crate-private (see `bt_insert_step_no_rebase_body!`).
4844macro_rules! bt_insert_and_collect_matches_body {
4845    (
4846        $table:expr,
4847        $search_depth:expr,
4848        $abs_pos:ident,
4849        $current_abs_end:ident,
4850        $profile:ident,
4851        $min_match_len:ident,
4852        $best_len_for_skip:ident,
4853        $out:ident,
4854        $cmf:path $(,)?
4855    ) => {{
4856        let idx = $abs_pos - $table.history_abs_start;
4857        // Borrowed-aware live region (owned: `history[history_start..]`;
4858        // borrowed: the in-place input `[0, block_end)`). Reborrow-then-raw-ptr
4859        // so the slice holds NO borrow and coexists with the `&mut $table`
4860        // binary-tree writes below. Owned is byte-identical (same bytes).
4861        let concat: &[u8] = unsafe {
4862            let lh = $table.live_history();
4863            core::slice::from_raw_parts(lh.as_ptr(), lh.len())
4864        };
4865        if idx + 8 > concat.len() {
4866            return;
4867        }
4868        debug_assert!(
4869            $abs_pos <= $current_abs_end,
4870            "BT collect called past current block end"
4871        );
4872        let tail_limit = $current_abs_end - $abs_pos;
4873        let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
4874            concat,
4875            idx,
4876            $table.hash_log,
4877            $table.search_mls,
4878        );
4879        // Prefetch the hash bucket now. For the large L16+ hash table over
4880        // high-entropy input the bucket is L3/DRAM-cold, and unlike upstream's
4881        // monolithic ZSTD_btGetAllMatches (which overlaps this miss with its
4882        // inline rep/hash3 prologue) the read+write of `hash_table[hash]`
4883        // below is reached with nothing to hide it behind — it stalled a large
4884        // share of this function's cycles. Issuing the hint here lets the miss
4885        // overlap the address setup that follows.
4886        #[cfg(all(
4887            target_feature = "sse",
4888            any(target_arch = "x86", target_arch = "x86_64")
4889        ))]
4890        {
4891            #[cfg(target_arch = "x86")]
4892            use core::arch::x86::{_MM_HINT_T0, _mm_prefetch};
4893            #[cfg(target_arch = "x86_64")]
4894            use core::arch::x86_64::{_MM_HINT_T0, _mm_prefetch};
4895            // SAFETY: prefetch is a hint that never faults; `hash` indexes
4896            // `hash_table` directly below, so it is in bounds.
4897            unsafe {
4898                _mm_prefetch($table.hash_table.as_ptr().add(hash).cast(), _MM_HINT_T0);
4899            }
4900            // Prefetch the NEXT position's bucket too. The optimal-parser DP
4901            // advances one position per iteration, so this miss is issued a
4902            // full BT walk plus the next iteration's pre-collect work ahead of
4903            // the collect that will read it — far more lead than the same-call
4904            // hint above, enough to hide the full DRAM latency.
4905            if idx + 1 + 8 <= concat.len() {
4906                let hash_next =
4907                    $crate::encoding::match_table::storage::MatchTable::hash_position_at(
4908                        concat,
4909                        idx + 1,
4910                        $table.hash_log,
4911                        $table.search_mls,
4912                    );
4913                // SAFETY: prefetch never faults; an out-of-range index is a
4914                // harmless no-op hint.
4915                unsafe {
4916                    _mm_prefetch(
4917                        $table.hash_table.as_ptr().add(hash_next).cast(),
4918                        _MM_HINT_T0,
4919                    );
4920                }
4921            }
4922        }
4923        let Some(relative_pos) = $table.relative_position($abs_pos) else {
4924            return;
4925        };
4926        let stored = relative_pos + 1;
4927        let bt_mask = $table.bt_mask();
4928        // Hoist the BT pointer-pair table's base out of `self` once: every
4929        // access below is `chain_table[computed_index]` through `&mut self`,
4930        // which the optimizer cannot prove loop-invariant, so it reloads the
4931        // Vec's (ptr,len) from the struct AND bounds-checks on every tree
4932        // step (the donor walks a raw `U32* btable`, zstd_opt.c). The raw
4933        // base carries no borrow, so the `&self` helper calls in the loop
4934        // (`bt_pair_index_for_abs`, `window_low_abs_for_target`,
4935        // `relative_position`) coexist — they read other fields, never
4936        // `chain_table`. Indices are in bounds by the BT invariants:
4937        // `bt_pair_index_for_abs` returns `2*(abs & bt_mask) (+1)` ≤
4938        // `chain_table.len()-1`, and the slots only ever hold those values.
4939        let chain_ptr = $table.chain_table.as_mut_ptr();
4940        debug_assert_eq!($table.chain_table.len(), 2 << $table.bt_log());
4941        // See `bt_insert_step_no_rebase_body!`: saturating is needed for the
4942        // first BT walk of a fresh frame where `abs_pos < bt_mask`.
4943        let bt_low = $abs_pos.saturating_sub(bt_mask);
4944        let window_low = $table.window_low_abs_for_target($abs_pos);
4945        // Donor-style window bound in stored space so the BT-walk loop
4946        // condition rejects out-of-window / HC_EMPTY candidates WITHOUT
4947        // decoding them (mirrors upstream `while ... matchIndex >= matchLow`):
4948        // one range check on `match_stored` instead of decode-then-break,
4949        // dropping the wasted candidate_abs decode on every walk's terminating
4950        // step. candidate_abs(s) = (position_base + s - 1) - index_shift =
4951        // base + s (wrapping); in-window ⟺ candidate_abs - window_low <
4952        // abs_pos - window_low ⟺ s.wrapping_add(win_off) < win_range.
4953        // HC_EMPTY (s = 0) maps to base = (lowest representable abs) - 1 <
4954        // window_low, so it falls out of range and ends the walk.
4955        let win_off = $table
4956            .position_base
4957            .wrapping_sub(1)
4958            .wrapping_sub($table.index_shift)
4959            .wrapping_sub(window_low);
4960        let win_range = $abs_pos - window_low;
4961        // Raw `+ 9` is safe here — see `bt_insert_step_no_rebase_body!`
4962        // for the full discussion of the upstream `STREAM_ABS_HEADROOM`
4963        // cap in `MatchTable::add_data`.
4964        let mut match_end_abs = $abs_pos + 9;
4965        let mut compares_left = $profile.max_chain_depth.min($search_depth);
4966        let mut common_length_smaller = 0usize;
4967        let mut common_length_larger = 0usize;
4968        let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
4969        let mut smaller_slot = pair_idx;
4970        let mut larger_slot = pair_idx + 1;
4971        let mut match_stored = $table.hash_table[hash];
4972        $table.hash_table[hash] = stored;
4973        // Donor semantics: `bestLength` starts at `lengthToBeat - 1`; rep/hash3
4974        // probing may raise it; BT then only reports strictly longer matches.
4975        // `min_match_len >= HC_FORMAT_MINMATCH (3)` by configure invariant,
4976        // so `min_match_len - 1 >= 2` cannot underflow.
4977        debug_assert!(
4978            $min_match_len >= $crate::encoding::cost_model::HC_FORMAT_MINMATCH,
4979            "min_match_len must be at least HC_FORMAT_MINMATCH"
4980        );
4981        let mut best_len = (*$best_len_for_skip).max($min_match_len - 1);
4982
4983        // Donor-form loop condition: the stored-space window range check
4984        // (`s.wrapping_add(win_off) < win_range`) rejects out-of-window and
4985        // HC_EMPTY candidates here, so the terminating step never enters the
4986        // body — no wasted candidate_abs decode, matching upstream's
4987        // `while ... matchIndex >= matchLow`.
4988        while compares_left > 0 && (match_stored as usize).wrapping_add(win_off) < win_range {
4989            compares_left -= 1;
4990            // The condition proved this candidate is in `[window_low,
4991            // abs_pos)`, so `match_stored >= 1` (HC_EMPTY is out of range) and
4992            // the `- 1` cannot underflow; candidate_abs == base + match_stored.
4993            let candidate_abs = ($table.position_base + (match_stored as usize - 1))
4994                .wrapping_sub($table.index_shift);
4995
4996            let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
4997            // SAFETY: `next_pair_idx (+1)` = `2*(candidate_abs & bt_mask) (+1)`
4998            // ≤ `chain_table.len()-1`; `chain_ptr` is the hoisted live base,
4999            // table not realloc'd during the walk.
5000            let next_smaller = unsafe { *chain_ptr.add(next_pair_idx) };
5001            let next_larger = unsafe { *chain_ptr.add(next_pair_idx + 1) };
5002            let seed_len = common_length_smaller.min(common_length_larger);
5003            let candidate_idx = candidate_abs - $table.history_abs_start;
5004            // SAFETY: BT walk invariant — `candidate_idx + tail_limit ≤
5005            // concat.len()`.
5006            let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
5007
5008            if match_len > best_len {
5009                let offset = $abs_pos - candidate_abs;
5010                let accepted = $crate::encoding::bt::BtMatcher::push_candidate_ladder(
5011                    $out,
5012                    $best_len_for_skip,
5013                    $crate::encoding::opt::types::MatchCandidate {
5014                        start: $abs_pos,
5015                        offset,
5016                        match_len,
5017                    },
5018                    $min_match_len,
5019                );
5020                if accepted {
5021                    best_len = match_len;
5022                    // BT walker invariants: `candidate_abs < abs_pos`
5023                    // and `match_len <= tail_limit = current_abs_end -
5024                    // abs_pos`. So `candidate_abs + match_len <
5025                    // abs_pos + tail_limit = current_abs_end`, which
5026                    // fits in `usize` on every supported target (32-bit
5027                    // i686 included) — the addition stays within the
5028                    // current block.
5029                    let candidate_end = candidate_abs + match_len;
5030                    if candidate_end > match_end_abs {
5031                        match_end_abs = candidate_end;
5032                    }
5033                    if match_len >= tail_limit
5034                        || match_len > $crate::encoding::cost_model::HC_OPT_NUM
5035                    {
5036                        break;
5037                    }
5038                }
5039            }
5040
5041            if match_len >= tail_limit {
5042                break;
5043            }
5044
5045            let candidate_next = candidate_idx + match_len;
5046            let current_next = idx + match_len;
5047            // SAFETY: first-differing positions after a match_len-long prefix;
5048            // match_len < tail_limit (break above) + BT-walk bound
5049            // idx/candidate_idx + tail_limit <= concat.len() keep both in range.
5050            if unsafe {
5051                *concat.get_unchecked(candidate_next) < *concat.get_unchecked(current_next)
5052            } {
5053                // SAFETY: `smaller_slot` holds a valid pair index (init
5054                // `pair_idx`, updated to `next_pair_idx + 1`); the `usize::MAX`
5055                // sentinel is set only just before `break`, never written here.
5056                unsafe { *chain_ptr.add(smaller_slot) = match_stored };
5057                common_length_smaller = match_len;
5058                if candidate_abs <= bt_low {
5059                    smaller_slot = usize::MAX;
5060                    break;
5061                }
5062                smaller_slot = next_pair_idx + 1;
5063                match_stored = next_larger;
5064            } else {
5065                // SAFETY: as above for `larger_slot`.
5066                unsafe { *chain_ptr.add(larger_slot) = match_stored };
5067                common_length_larger = match_len;
5068                if candidate_abs <= bt_low {
5069                    larger_slot = usize::MAX;
5070                    break;
5071                }
5072                larger_slot = next_pair_idx;
5073                match_stored = next_smaller;
5074            }
5075        }
5076
5077        // SAFETY: both slots, when not the `usize::MAX` sentinel, hold valid
5078        // pair indices into the hoisted `chain_table` base.
5079        if smaller_slot != usize::MAX {
5080            unsafe {
5081                *chain_ptr.add(smaller_slot) = $crate::encoding::match_table::storage::HC_EMPTY
5082            };
5083        }
5084        if larger_slot != usize::MAX {
5085            unsafe {
5086                *chain_ptr.add(larger_slot) = $crate::encoding::match_table::storage::HC_EMPTY
5087            };
5088        }
5089
5090        // Dict dual-probe (donor `ZSTD_dictMatchState`, zstd_opt.c:777-813):
5091        // after the live tree, descend the immutable dictionary BINARY TREE
5092        // (built in `prime_dms_bt`) with its OWN compare budget and push any
5093        // dict match longer than the live best into the ladder. The DUBT
5094        // descent reaches the longest dict match efficiently (a hash-chain
5095        // surfaced only the few same-bucket candidates and left most of the
5096        // dict savings unrealised at btlazy2 / btopt). Dict positions are
5097        // dictionary-relative concat indices in `[0, region)`, pinned at the
5098        // front of history, so a dict candidate at `dict_idx` sits at offset
5099        // `idx - dict_idx` (no donor `dmsIndexDelta`). The optimal parser
5100        // prices these (its DP lookahead values the repcode chain a dict match
5101        // seeds); the greedy/lazy parser commits the longest.
5102        if let Some(dms) = $table.dms.table() {
5103            let region = $table.dms.region_len();
5104            let dh = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
5105                concat,
5106                idx,
5107                dms.hash_log,
5108                dms.mls,
5109            );
5110            let mut dcur = dms.hash_table[dh];
5111            // DUBT seed lengths: bytes already known common on each side, so
5112            // `$cmf` resumes from there (donor commonLengthSmaller/Larger).
5113            let mut common_smaller = 0usize;
5114            let mut common_larger = 0usize;
5115            let mut dms_compares = $profile.max_chain_depth.min($search_depth);
5116            while dms_compares > 0 && dcur != $crate::encoding::match_table::storage::HC_EMPTY {
5117                let dict_idx = (dcur - 1) as usize;
5118                // The dict tree holds only dict positions (`< region <= idx`).
5119                if dict_idx >= region || dict_idx >= idx {
5120                    break;
5121                }
5122                dms_compares -= 1;
5123                let pair = 2 * dict_idx;
5124                let seed = common_smaller.min(common_larger);
5125                // SAFETY: `dict_idx < idx` and `idx + tail_limit <=
5126                // concat.len()` (checked at entry); same umbrella as the live
5127                // walk's `$cmf`. `seed <= prior match_len <= tail_limit`.
5128                let match_len = unsafe { $cmf(concat, idx, dict_idx, tail_limit, seed) };
5129                if match_len > best_len {
5130                    let offset = idx - dict_idx;
5131                    let accepted = $crate::encoding::bt::BtMatcher::push_candidate_ladder(
5132                        $out,
5133                        $best_len_for_skip,
5134                        $crate::encoding::opt::types::MatchCandidate {
5135                            start: $abs_pos,
5136                            offset,
5137                            match_len,
5138                        },
5139                        $min_match_len,
5140                    );
5141                    if accepted {
5142                        best_len = match_len;
5143                        let candidate_end = $abs_pos + match_len;
5144                        if candidate_end > match_end_abs {
5145                            match_end_abs = candidate_end;
5146                        }
5147                        if match_len > $crate::encoding::cost_model::HC_OPT_NUM {
5148                            break;
5149                        }
5150                    }
5151                }
5152                // Match reached the block tail: can't order the pair (donor
5153                // `ip+matchLength == iLimit`), and indexing `concat[idx +
5154                // match_len]` below would step past the searchable region.
5155                if match_len >= tail_limit {
5156                    break;
5157                }
5158                // Descend the DUBT (donor zstd_opt.c:806-811): dict candidate
5159                // smaller than input → its larger child is closer to `idx`.
5160                if concat[dict_idx + match_len] < concat[idx + match_len] {
5161                    common_smaller = match_len;
5162                    dcur = dms.chain_table[pair + 1];
5163                } else {
5164                    common_larger = match_len;
5165                    dcur = dms.chain_table[pair];
5166                }
5167            }
5168        }
5169
5170        // `match_end_abs >= abs_pos + 9 >= 9` (initialized and monotonic),
5171        // so `match_end_abs - 8 >= 1` cannot underflow.
5172        $table.skip_insert_until_abs = match_end_abs - 8;
5173    }};
5174}
5175pub(crate) use bt_insert_and_collect_matches_body;
5176
5177impl HcMatchGenerator {
5178    /// Heap bytes this generator owns: the shared match table plus the BT
5179    /// backend's optimal-parser / LDM scratch (the HC knobs are inline).
5180    fn heap_size(&self) -> usize {
5181        self.table.heap_size() + self.backend.heap_size()
5182    }
5183
5184    fn should_run_btultra2_seed_pass<S: super::strategy::Strategy>(
5185        &self,
5186        current_len: usize,
5187    ) -> bool {
5188        // The in-block two-pass dynamic-stats seed (`initStats_ultra`)
5189        // is btultra2-only. `TWO_PASS_SEED` is `false` for every other
5190        // strategy — including btultra, which now shares the hash3
5191        // short-match probe but stays single-pass — so the seed call and
5192        // its body drop at codegen time for all non-btultra2 kernels.
5193        if !S::TWO_PASS_SEED {
5194            return false;
5195        }
5196        let HcBackend::Bt(bt) = &self.backend else {
5197            return false;
5198        };
5199        bt.opt_state.lit_length_sum == 0
5200            && bt.opt_state.dictionary_seed.is_none()
5201            && !self.table.dictionary_primed_for_frame
5202            && bt.ldm_sequences.is_empty()
5203            && self.table.window_size == current_len
5204            && self.table.history_abs_start == 0
5205            && self.table.chunk_lens.len() == 1
5206            && current_len > HC_PREDEF_THRESHOLD
5207    }
5208
5209    fn new(max_window_size: usize) -> Self {
5210        Self {
5211            table: super::match_table::storage::MatchTable::new(max_window_size),
5212            hc: super::hc::HcMatcher::new(2, HC_SEARCH_DEPTH, HC_TARGET_LEN),
5213            // Default to the zero-sized HC backend; `configure()` swaps
5214            // in a `BtMatcher` only when an optimal strategy lands.
5215            backend: HcBackend::Hc,
5216            // Lazy is the per-construct default — every production
5217            // caller calls `configure()` before the first encode and
5218            // overwrites this. Tests that drive `HcMatchGenerator`
5219            // without calling `configure()` end up in the
5220            // `start_matching_lazy` arm of the test dispatcher, which
5221            // matches the previous default behaviour.
5222            strategy_tag: super::strategy::StrategyTag::Lazy,
5223        }
5224    }
5225
5226    fn configure(&mut self, config: HcConfig, tag: super::strategy::StrategyTag, window_log: u8) {
5227        use super::strategy::StrategyTag;
5228        // Mirror the driver-resolved strategy tag so the
5229        // `#[cfg(test)] start_matching` dispatcher can route
5230        // BtOpt / BtUltra / BtUltra2 to distinct monomorphisations.
5231        self.strategy_tag = tag;
5232        let is_btultra2 = tag == StrategyTag::BtUltra2;
5233        let uses_bt = matches!(
5234            tag,
5235            StrategyTag::Btlazy2
5236                | StrategyTag::BtOpt
5237                | StrategyTag::BtUltra
5238                | StrategyTag::BtUltra2
5239        );
5240        // btultra and btultra2 both run the mls=3 hash3 short-match probe
5241        // (clevels.h minMatch 3). The `is_btultra2` flag below stays
5242        // exclusive to btultra2 because it tweaks the BT rebase boundary,
5243        // not match finding.
5244        let wants_hash3 = matches!(tag, StrategyTag::BtUltra | StrategyTag::BtUltra2);
5245        let next_hash3_log = if wants_hash3 {
5246            HC3_HASH_LOG.min(window_log as usize)
5247        } else {
5248            0
5249        };
5250        let resize = self.table.hash_log != config.hash_log
5251            || self.table.chain_log != config.chain_log
5252            || self.table.hash3_log != next_hash3_log;
5253        self.table.hash_log = config.hash_log;
5254        self.table.chain_log = config.chain_log;
5255        self.table.hash3_log = next_hash3_log;
5256        self.hc.search_depth = if uses_bt {
5257            config.search_depth
5258        } else {
5259            config.search_depth.min(MAX_HC_SEARCH_DEPTH)
5260        };
5261        self.hc.target_len = config.target_len;
5262        // Mirror strategy-derived flags + HC search depth onto MatchTable
5263        // so the BT walker and rebase machinery can read them directly
5264        // without dispatching back through HcMatchGenerator.
5265        self.table.search_depth = self.hc.search_depth;
5266        self.table.is_btultra2 = is_btultra2;
5267        self.table.uses_bt = uses_bt;
5268        // BT finder hash width, donor `mls = BOUNDED(4, cParams.minMatch, 6)`,
5269        // carried explicitly in the level config so a `target_length` override
5270        // cannot silently flip the finder between 5- and 4-byte hashing. Only
5271        // the BT body reads it; HC/lazy levels leave it at 4. clevels.h
5272        // (srcSize > 256 KiB tier): btlazy2 L13-15 + btopt L16 are minMatch=5,
5273        // btopt L17 is minMatch=4, btultra/btultra2 are minMatch=3 (4-byte main
5274        // hash + the hash3 short-match probe).
5275        self.table.search_mls = config.search_mls;
5276        // Stage D: promote the backend discriminator. HC modes drop the
5277        // BT scratch buffers entirely; switching back into a BT mode
5278        // allocates a fresh `BtMatcher` on demand.
5279        match (&self.backend, self.table.uses_bt) {
5280            (HcBackend::Hc, true) => {
5281                self.backend = HcBackend::Bt(alloc::boxed::Box::new(super::bt::BtMatcher::new()));
5282            }
5283            (HcBackend::Bt(_), false) => {
5284                self.backend = HcBackend::Hc;
5285            }
5286            _ => {}
5287        }
5288        if resize && !self.table.hash_table.is_empty() {
5289            // Force reallocation on next ensure_tables() call.
5290            self.table.hash_table.clear();
5291            self.table.hash3_table.clear();
5292            self.table.chain_table.clear();
5293        }
5294    }
5295
5296    fn seed_dictionary_entropy(
5297        &mut self,
5298        huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
5299        ll: Option<&crate::fse::fse_encoder::FSETable>,
5300        ml: Option<&crate::fse::fse_encoder::FSETable>,
5301        of: Option<&crate::fse::fse_encoder::FSETable>,
5302    ) {
5303        if let HcBackend::Bt(bt) = &mut self.backend {
5304            bt.opt_state.seed_dictionary_entropy(huff, ll, ml, of);
5305        }
5306    }
5307
5308    /// Install (or clear) the long-distance-match producer (#27). Only
5309    /// the BT backend owns an `ldm_producer` slot; on the HC (lazy)
5310    /// backend the producer is dropped because there is no optimal-parser
5311    /// candidate buffer to seed. Call after [`Self::reset`].
5312    #[cfg(feature = "hash")]
5313    fn set_ldm_producer(&mut self, producer: Option<super::ldm::LdmProducer>) {
5314        if let HcBackend::Bt(bt) = &mut self.backend {
5315            bt.ldm_producer = producer;
5316        }
5317    }
5318
5319    /// Move the LDM producer out of the BT backend, leaving `None`. Used by the
5320    /// dictionary snapshot path: the producer carries no dictionary state (LDM
5321    /// is not dict-primed; its hash table is empty at capture), so it is not
5322    /// retained in the snapshot — the working frame's freshly-reset producer is
5323    /// reinstated on restore instead.
5324    #[cfg(feature = "hash")]
5325    fn take_ldm_producer(&mut self) -> Option<super::ldm::LdmProducer> {
5326        if let HcBackend::Bt(bt) = &mut self.backend {
5327            bt.ldm_producer.take()
5328        } else {
5329            None
5330        }
5331    }
5332
5333    fn reset(&mut self, reuse_space: impl FnMut(Vec<u8>)) {
5334        self.table.reset(reuse_space);
5335        if let HcBackend::Bt(bt) = &mut self.backend {
5336            bt.reset();
5337        }
5338    }
5339
5340    /// Backfill positions from the tail of the previous slice that couldn't be
5341    /// hashed at the time (insert_position needs 4 bytes of lookahead).
5342    fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
5343        self.table.skip_matching(incompressible_hint);
5344    }
5345
5346    /// Runtime-dispatched entry kept only for in-crate tests. Production
5347    /// callers reach the inner loops through
5348    /// [`Self::start_matching_strategy`] / [`MatchGeneratorDriver::compress_block`]
5349    /// which pick the lazy / optimal arm from `S::USE_BT` at
5350    /// monomorphisation time.
5351    #[cfg(test)]
5352    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
5353        use super::strategy::{self, StrategyTag};
5354        // Dispatch on the mirrored `strategy_tag` so each test runs
5355        // under the same monomorphisation production would pick.
5356        // `BtOpt` / `BtUltra` / `BtUltra2` remain distinct here even
5357        // though `table.uses_bt` / `is_btultra2` alone can't separate
5358        // BtOpt from BtUltra.
5359        match self.strategy_tag {
5360            StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
5361                self.start_matching_lazy(&mut handle_sequence)
5362            }
5363            StrategyTag::Btlazy2 => self.start_matching_btlazy2(&mut handle_sequence),
5364            StrategyTag::BtOpt => {
5365                self.start_matching_optimal::<strategy::BtOpt>(&mut handle_sequence)
5366            }
5367            StrategyTag::BtUltra => {
5368                self.start_matching_optimal::<strategy::BtUltra>(&mut handle_sequence)
5369            }
5370            StrategyTag::BtUltra2 => {
5371                self.start_matching_optimal::<strategy::BtUltra2>(&mut handle_sequence)
5372            }
5373        }
5374    }
5375
5376    /// Strategy-aware entry point used by
5377    /// [`MatchGeneratorDriver::compress_block`]. Branches on
5378    /// `S::USE_BT` — a compile-time `const` — so each
5379    /// monomorphisation keeps exactly one arm: `Lazy` /
5380    /// `Fast` / `Dfast` / `Greedy` see only `start_matching_lazy`,
5381    /// `BtOpt` / `BtUltra` / `BtUltra2` see only
5382    /// `start_matching_optimal`. The inherent test-only
5383    /// [`HcMatchGenerator::start_matching`] reaches the same arms by
5384    /// runtime-matching on `self.strategy_tag` (the parse-mode field
5385    /// has been removed); production never invokes that path.
5386    pub(crate) fn start_matching_strategy<S: super::strategy::Strategy>(
5387        &mut self,
5388        handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
5389    ) {
5390        debug_assert_eq!(
5391            self.table.uses_bt,
5392            S::USE_BT,
5393            "Strategy::USE_BT disagrees with runtime table.uses_bt at HC dispatch"
5394        );
5395        if S::USE_BT {
5396            self.start_matching_optimal::<S>(handle_sequence)
5397        } else {
5398            self.start_matching_lazy(handle_sequence)
5399        }
5400    }
5401
5402    pub(crate) fn start_matching_lazy(
5403        &mut self,
5404        mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5405    ) {
5406        self.table.ensure_tables();
5407
5408        // `current_block_range()` is borrowed-aware: owned → last committed
5409        // chunk; borrowed → the staged in-place block range.
5410        let (current_abs_start, current_len) = self.table.current_block_range();
5411        if current_len == 0 {
5412            return;
5413        }
5414        // The current block is the tail of `history` (owned) or the staged
5415        // borrowed range (`get_last_space()` resolves both). Hoist it as a raw
5416        // slice: the routine mutates the hash/chain tables + `offset_hist` but
5417        // never reallocates `history`, so the slice stays valid and we avoid
5418        // re-borrowing `self.table` (which would conflict with the
5419        // `offset_hist` write).
5420        let current_ptr = self.table.get_last_space().as_ptr();
5421        let current: &[u8] = unsafe { core::slice::from_raw_parts(current_ptr, current_len) };
5422
5423        let current_abs_end = current_abs_start + current_len;
5424        self.table
5425            .backfill_boundary_positions(current_abs_start, current_abs_end);
5426
5427        let mut pos = 0usize;
5428        let mut literals_start = 0usize;
5429        while pos + HC_MIN_MATCH_LEN <= current_len {
5430            let abs_pos = current_abs_start + pos;
5431            let lit_len = pos - literals_start;
5432
5433            let best = self.hc.find_best_match(&self.table, abs_pos, lit_len);
5434            if let Some(candidate) = self.hc.pick_lazy_match(&self.table, abs_pos, lit_len, best) {
5435                self.table
5436                    .insert_match_span(abs_pos, candidate.start + candidate.match_len);
5437                let start = candidate.start - current_abs_start;
5438                let literals = &current[literals_start..start];
5439                handle_sequence(Sequence::Triple {
5440                    literals,
5441                    offset: candidate.offset,
5442                    match_len: candidate.match_len,
5443                });
5444                let _ = encode_offset_with_history(
5445                    candidate.offset as u32,
5446                    literals.len() as u32,
5447                    &mut self.table.offset_hist,
5448                );
5449                pos = start + candidate.match_len;
5450                literals_start = pos;
5451            } else {
5452                self.table.insert_position(abs_pos);
5453                pos += 1;
5454            }
5455        }
5456
5457        // Insert remaining hashable positions in the tail (the matching loop
5458        // stops at HC_MIN_MATCH_LEN but insert_position only needs 4 bytes).
5459        while pos + 4 <= current_len {
5460            self.table.insert_position(current_abs_start + pos);
5461            pos += 1;
5462        }
5463
5464        if literals_start < current_len {
5465            handle_sequence(Sequence::Literals {
5466                literals: &current[literals_start..],
5467            });
5468        }
5469    }
5470
5471    /// Register the borrowed input window for the no-copy one-shot path.
5472    /// # Safety
5473    /// `buffer` must outlive the borrowed scans (see `MatchTable`).
5474    pub(crate) unsafe fn set_borrowed_window(&mut self, buffer: &[u8]) {
5475        // SAFETY: forwarded liveness contract.
5476        unsafe { self.table.set_borrowed_window(buffer) };
5477    }
5478
5479    pub(crate) fn clear_borrowed_window(&mut self) {
5480        self.table.clear_borrowed_window();
5481    }
5482
5483    /// Borrowed (no-copy) equivalent of [`Self::start_matching_lazy`]: stage
5484    /// the in-place block range, then run the same lazy chain parse. The
5485    /// parse reads its range via `current_block_range()` and its bytes via
5486    /// `get_last_space()` / `live_history()`, all borrowed-aware, so the block
5487    /// is scanned in place with the per-position window_low offset cap.
5488    pub(crate) fn start_matching_lazy_borrowed(
5489        &mut self,
5490        block_start: usize,
5491        block_end: usize,
5492        handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5493    ) {
5494        self.table.stage_borrowed_block(block_start, block_end);
5495        self.start_matching_lazy(handle_sequence);
5496    }
5497
5498    /// Borrowed (no-copy) equivalent of the lazy `skip_matching`: stage the
5499    /// in-place block, then seed positions without an owned-history append.
5500    pub(crate) fn skip_matching_borrowed(
5501        &mut self,
5502        block_start: usize,
5503        block_end: usize,
5504        incompressible_hint: Option<bool>,
5505    ) {
5506        self.table.stage_borrowed_block(block_start, block_end);
5507        self.table.skip_matching(incompressible_hint);
5508    }
5509
5510    /// Donor `ZSTD_btlazy2` (levels 13-15): binary-tree match finder with a
5511    /// greedy/lazy parse. Bare dispatcher — resolves the runtime tier ONCE
5512    /// per block via `select_kernel()` and calls the matching
5513    /// `start_matching_btlazy2_<kernel>` wrapper, so the per-position BT
5514    /// collect runs under a single `#[target_feature]` umbrella (mirrors
5515    /// `build_optimal_plan_impl`). See `start_matching_btlazy2_body!` for the
5516    /// shared loop.
5517    fn start_matching_btlazy2(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
5518        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5519        unsafe {
5520            self.start_matching_btlazy2_neon(&mut handle_sequence)
5521        }
5522        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5523        {
5524            use crate::encoding::fastpath::{FastpathKernel, select_kernel};
5525            match select_kernel() {
5526                FastpathKernel::Avx2Bmi2 => unsafe {
5527                    self.start_matching_btlazy2_avx2_bmi2(&mut handle_sequence)
5528                },
5529                FastpathKernel::Sse42 => unsafe {
5530                    self.start_matching_btlazy2_sse42(&mut handle_sequence)
5531                },
5532                FastpathKernel::Scalar => self.start_matching_btlazy2_scalar(&mut handle_sequence),
5533            }
5534        }
5535        #[cfg(not(any(
5536            all(target_arch = "aarch64", target_endian = "little"),
5537            target_arch = "x86",
5538            target_arch = "x86_64"
5539        )))]
5540        {
5541            self.start_matching_btlazy2_scalar(&mut handle_sequence)
5542        }
5543    }
5544
5545    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5546    #[target_feature(enable = "neon")]
5547    unsafe fn start_matching_btlazy2_neon(
5548        &mut self,
5549        mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5550    ) {
5551        start_matching_btlazy2_body!(
5552            self,
5553            handle_sequence,
5554            collect_optimal_candidates_initialized_neon,
5555            crate::encoding::fastpath::neon::count_match_from_indices
5556        )
5557    }
5558
5559    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5560    #[target_feature(enable = "sse4.2")]
5561    unsafe fn start_matching_btlazy2_sse42(
5562        &mut self,
5563        mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5564    ) {
5565        start_matching_btlazy2_body!(
5566            self,
5567            handle_sequence,
5568            collect_optimal_candidates_initialized_sse42,
5569            crate::encoding::fastpath::sse42::count_match_from_indices
5570        )
5571    }
5572
5573    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5574    #[target_feature(enable = "avx2,bmi2")]
5575    unsafe fn start_matching_btlazy2_avx2_bmi2(
5576        &mut self,
5577        mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5578    ) {
5579        start_matching_btlazy2_body!(
5580            self,
5581            handle_sequence,
5582            collect_optimal_candidates_initialized_avx2_bmi2,
5583            crate::encoding::fastpath::avx2_bmi2::count_match_from_indices
5584        )
5585    }
5586
5587    // Scalar wrapper: no `#[target_feature]`; `$collect` (the scalar collect)
5588    // is a safe fn, so the body macro's `unsafe` block is inert here. Same cfg
5589    // as `collect_optimal_candidates_initialized_scalar` (absent on
5590    // aarch64-little, where NEON is the baseline tier).
5591    #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
5592    #[allow(unused_unsafe)]
5593    fn start_matching_btlazy2_scalar(
5594        &mut self,
5595        mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5596    ) {
5597        start_matching_btlazy2_body!(
5598            self,
5599            handle_sequence,
5600            collect_optimal_candidates_initialized_scalar,
5601            crate::encoding::fastpath::scalar::count_match_from_indices
5602        )
5603    }
5604
5605    fn start_matching_optimal<S: super::strategy::Strategy>(
5606        &mut self,
5607        mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
5608    ) {
5609        self.table.ensure_tables();
5610        // Borrowed-aware: owned → last committed chunk; borrowed → staged
5611        // in-place block range.
5612        let (current_abs_start, current_len) = self.table.current_block_range();
5613        if current_len == 0 {
5614            return;
5615        }
5616        let current_ptr = self.table.get_last_space().as_ptr();
5617        // `start_matching_optimal()` mutates tables/state but never mutates or
5618        // reallocates `self.table.history`, so this tail slice remains valid for
5619        // the duration of the routine and avoids cloning the full block.
5620        let current = unsafe { core::slice::from_raw_parts(current_ptr, current_len) };
5621
5622        let current_abs_end = current_abs_start + current_len;
5623        self.table
5624            .apply_limited_update_after_long_match(current_abs_start);
5625        let hash3_start_cursor = self
5626            .table
5627            .skip_insert_until_abs
5628            .max(self.table.history_abs_start);
5629        self.table
5630            .backfill_boundary_positions(current_abs_start, current_abs_end);
5631        self.table.next_to_update3 = hash3_start_cursor;
5632        // Borrow split: `prepare_ldm_candidates` needs immutable
5633        // access to the live history (the post-`history_start`
5634        // slice of `self.table.history`) while it mutates the LDM
5635        // bucket table owned by `self.backend.bt_mut()`. Both live
5636        // in disjoint fields of `Self`, so we capture the slice +
5637        // its base before reaching for `bt_mut()`.
5638        //
5639        // The producer operates in absolute stream coordinates
5640        // throughout; `live_history[0]` corresponds to absolute
5641        // `history_abs_start` (donor `base + dictLimit`), and the
5642        // abs→slice translation happens inside the producer at
5643        // each `live_history[..]` access. Passing the full
5644        // `history` Vec would index into the dead prefix (the
5645        // bytes already retired past `history_start`).
5646        let live_history = self.table.live_history();
5647        let history_abs_start = self.table.history_abs_start;
5648        self.backend.bt_mut().prepare_ldm_candidates(
5649            live_history,
5650            history_abs_start,
5651            current_abs_start,
5652            current_len,
5653        );
5654
5655        if self.should_run_btultra2_seed_pass::<S>(current_len) {
5656            self.run_btultra2_seed_pass(current, current_abs_start, current_len);
5657        }
5658
5659        // Const-generic profile selection: every field is folded from
5660        // S's associated consts (MAX_CHAIN_DEPTH /
5661        // SUFFICIENT_MATCH_LEN / ACCURATE_PRICE / FAVOR_SMALL_OFFSETS),
5662        // so the optimiser produces the literal at codegen time
5663        // without a runtime match.
5664        let profile = HcOptimalCostProfile::const_for_strategy::<S>();
5665        let mut opt_state =
5666            core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
5667        opt_state.rescale_freqs(current, profile);
5668        let mut best_plan = core::mem::take(&mut self.backend.bt_mut().opt_segment_plan_scratch);
5669        best_plan.clear();
5670        let mut plan_reps = self.table.offset_hist;
5671        let (mut cursor, mut plan_litlen) = self
5672            .table
5673            .donor_opt_start_cursor_and_litlen(current_abs_start);
5674        let mut plan_literals_cursor = 0usize;
5675        let match_loop_limit = current_len.saturating_sub(8);
5676        while cursor < match_loop_limit {
5677            let remaining_len = current_len - cursor;
5678            let segment_abs_start = current_abs_start + cursor;
5679            let segment_start = best_plan.len();
5680            let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
5681                &current[cursor..],
5682                segment_abs_start,
5683                remaining_len,
5684                HcOptimalPlanState {
5685                    block_offset: cursor,
5686                    reps: plan_reps,
5687                    litlen: plan_litlen,
5688                    profile,
5689                },
5690                &opt_state,
5691                &mut best_plan,
5692            );
5693            BtMatcher::update_plan_stats_segment(
5694                current,
5695                current_len,
5696                &best_plan[segment_start..],
5697                &mut plan_literals_cursor,
5698                &mut plan_reps,
5699                &mut opt_state,
5700                profile.accurate,
5701            );
5702            plan_reps = end_reps;
5703            plan_litlen = end_litlen;
5704            cursor += consumed_len;
5705        }
5706
5707        self.table
5708            .emit_optimal_plan(current_len, &best_plan, &mut handle_sequence);
5709        best_plan.clear();
5710        self.backend.bt_mut().opt_segment_plan_scratch = best_plan;
5711        self.backend.bt_mut().opt_state = opt_state;
5712    }
5713
5714    fn run_btultra2_seed_pass(
5715        &mut self,
5716        current: &[u8],
5717        current_abs_start: usize,
5718        current_len: usize,
5719    ) {
5720        // The seed pass is BtUltra2-exclusive by name (the only
5721        // caller is `should_run_btultra2_seed_pass`), so pin `S` to
5722        // `BtUltra2` for both the cost-profile lookup and the
5723        // `build_optimal_plan::<S>` call below.
5724        type S = super::strategy::BtUltra2;
5725        let seed_profile = HcOptimalCostProfile::const_for_strategy::<S>();
5726        let mut opt_state =
5727            core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
5728        opt_state.rescale_freqs(current, seed_profile);
5729        let mut seed_reps = self.table.offset_hist;
5730        let (mut cursor, mut seed_litlen) = self
5731            .table
5732            .donor_opt_start_cursor_and_litlen(current_abs_start);
5733        let mut seed_literals_cursor = 0usize;
5734        let mut seed_plan = core::mem::take(&mut self.backend.bt_mut().opt_seed_plan_scratch);
5735        seed_plan.clear();
5736        let match_loop_limit = current_len.saturating_sub(8);
5737        while cursor < match_loop_limit {
5738            let remaining_len = current_len - cursor;
5739            let segment_abs_start = current_abs_start + cursor;
5740            let segment_start = seed_plan.len();
5741            let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
5742                &current[cursor..],
5743                segment_abs_start,
5744                remaining_len,
5745                HcOptimalPlanState {
5746                    block_offset: cursor,
5747                    reps: seed_reps,
5748                    litlen: seed_litlen,
5749                    profile: seed_profile,
5750                },
5751                &opt_state,
5752                &mut seed_plan,
5753            );
5754            BtMatcher::update_plan_stats_segment(
5755                current,
5756                current_len,
5757                &seed_plan[segment_start..],
5758                &mut seed_literals_cursor,
5759                &mut seed_reps,
5760                &mut opt_state,
5761                seed_profile.accurate,
5762            );
5763            seed_plan.truncate(segment_start);
5764            seed_reps = end_reps;
5765            seed_litlen = end_litlen;
5766            cursor += consumed_len;
5767        }
5768        seed_plan.clear();
5769        self.backend.bt_mut().opt_seed_plan_scratch = seed_plan;
5770        self.backend.bt_mut().opt_state = opt_state;
5771
5772        // Donor initStats_ultra keeps the collected entropy statistics but
5773        // invalidates the first-pass matchfinder history before the real pass.
5774        self.table.position_base = self.table.history_abs_start;
5775        self.table.index_shift = current_len;
5776        self.table.next_to_update3 = current_abs_start;
5777        self.table.skip_insert_until_abs = current_abs_start;
5778        // Donor `ZSTD_initStats_ultra()` invalidates the first scan by moving
5779        // `window.base` back by `srcSize`, making the real pass start at
5780        // `curr == srcSize` instead of 0. Position 0 is therefore a valid
5781        // table entry in the second pass even though raw C tables reserve
5782        // value 0 as empty during an unshifted first pass.
5783        self.table.allow_zero_relative_position = true;
5784    }
5785
5786    fn build_optimal_plan<S: super::strategy::Strategy>(
5787        &mut self,
5788        current: &[u8],
5789        current_abs_start: usize,
5790        current_len: usize,
5791        initial_state: HcOptimalPlanState,
5792        stats: &HcOptState,
5793        out: &mut Vec<HcOptimalSequence>,
5794    ) -> (u32, [u32; 3], usize, usize) {
5795        debug_assert!(S::USE_BT, "build_optimal_plan called on non-BT strategy");
5796        debug_assert_eq!(initial_state.profile.accurate, S::ACCURATE_PRICE);
5797        debug_assert_eq!(
5798            initial_state.profile.favor_small_offsets,
5799            S::FAVOR_SMALL_OFFSETS
5800        );
5801        // `S::ACCURATE_PRICE` / `S::FAVOR_SMALL_OFFSETS` cannot appear
5802        // as const-generic arguments yet (`generic_const_exprs` is
5803        // still unstable), so dispatch over a 4-arm match — but on the
5804        // strategy's ASSOCIATED CONSTS, not the runtime profile (the
5805        // `debug_assert_eq`s above pin the runtime profile to those
5806        // consts). A const scrutinee folds the three dead arms at
5807        // monomorphisation; matching the runtime profile instead kept
5808        // all four `#[inline(always)]` DP bodies (~16 KB each) alive in
5809        // EVERY `S` instantiation — ~360 KB of the wasm payload.
5810        match (S::ACCURATE_PRICE, S::FAVOR_SMALL_OFFSETS) {
5811            (true, false) => self.build_optimal_plan_impl::<S, true, false>(
5812                current,
5813                current_abs_start,
5814                current_len,
5815                initial_state,
5816                stats,
5817                out,
5818            ),
5819            (true, true) => self.build_optimal_plan_impl::<S, true, true>(
5820                current,
5821                current_abs_start,
5822                current_len,
5823                initial_state,
5824                stats,
5825                out,
5826            ),
5827            (false, false) => self.build_optimal_plan_impl::<S, false, false>(
5828                current,
5829                current_abs_start,
5830                current_len,
5831                initial_state,
5832                stats,
5833                out,
5834            ),
5835            (false, true) => self.build_optimal_plan_impl::<S, false, true>(
5836                current,
5837                current_abs_start,
5838                current_len,
5839                initial_state,
5840                stats,
5841                out,
5842            ),
5843        }
5844    }
5845
5846    /// Cross-platform DP entry. Picks the kernel-specific variant so the
5847    /// entire optimal-parser DP body (per-position match gathering, price
5848    /// updates, traceback) runs inside a single `target_feature` umbrella
5849    /// alongside the per-position `collect_optimal_candidates_initialized_
5850    /// <kernel>`. This eliminates the final ABI barrier on the hot per-
5851    /// position match-collection call — the level22 critical path is now
5852    /// one straight-line inline chain from DP body down through BT walk
5853    /// and match-length probes.
5854    #[inline(always)]
5855    fn build_optimal_plan_impl<
5856        S: super::strategy::Strategy,
5857        const ACCURATE_PRICE: bool,
5858        const FAVOR_SMALL_OFFSETS: bool,
5859    >(
5860        &mut self,
5861        current: &[u8],
5862        current_abs_start: usize,
5863        current_len: usize,
5864        initial_state: HcOptimalPlanState,
5865        stats: &HcOptState,
5866        out: &mut Vec<HcOptimalSequence>,
5867    ) -> (u32, [u32; 3], usize, usize) {
5868        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5869        unsafe {
5870            self.build_optimal_plan_impl_neon::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5871                current,
5872                current_abs_start,
5873                current_len,
5874                initial_state,
5875                stats,
5876                out,
5877            )
5878        }
5879        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5880        {
5881            use crate::encoding::fastpath::{FastpathKernel, select_kernel};
5882            match select_kernel() {
5883                FastpathKernel::Avx2Bmi2 => unsafe {
5884                    self.build_optimal_plan_impl_avx2_bmi2::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5885                        current,
5886                        current_abs_start,
5887                        current_len,
5888                        initial_state,
5889                        stats,
5890                        out,
5891                    )
5892                },
5893                FastpathKernel::Sse42 => unsafe {
5894                    self.build_optimal_plan_impl_sse42::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5895                        current,
5896                        current_abs_start,
5897                        current_len,
5898                        initial_state,
5899                        stats,
5900                        out,
5901                    )
5902                },
5903                FastpathKernel::Scalar => self
5904                    .build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5905                        current,
5906                        current_abs_start,
5907                        current_len,
5908                        initial_state,
5909                        stats,
5910                        out,
5911                    ),
5912            }
5913        }
5914        #[cfg(not(any(
5915            all(target_arch = "aarch64", target_endian = "little"),
5916            target_arch = "x86",
5917            target_arch = "x86_64"
5918        )))]
5919        {
5920            self.build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
5921                current,
5922                current_abs_start,
5923                current_len,
5924                initial_state,
5925                stats,
5926                out,
5927            )
5928        }
5929    }
5930
5931    /// NEON-umbrella DP body. Inlines
5932    /// `collect_optimal_candidates_initialized_neon` (and its entire
5933    /// per-position pipeline) directly into the DP loop.
5934    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
5935    #[target_feature(enable = "neon")]
5936    unsafe fn build_optimal_plan_impl_neon<
5937        S: super::strategy::Strategy,
5938        const ACCURATE_PRICE: bool,
5939        const FAVOR_SMALL_OFFSETS: bool,
5940    >(
5941        &mut self,
5942        current: &[u8],
5943        current_abs_start: usize,
5944        current_len: usize,
5945        initial_state: HcOptimalPlanState,
5946        stats: &HcOptState,
5947        out: &mut Vec<HcOptimalSequence>,
5948    ) -> (u32, [u32; 3], usize, usize) {
5949        build_optimal_plan_impl_body!(
5950            self,
5951            S,
5952            current,
5953            current_abs_start,
5954            current_len,
5955            initial_state,
5956            stats,
5957            out,
5958            collect_optimal_candidates_initialized_neon,
5959        )
5960    }
5961
5962    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5963    #[target_feature(enable = "sse4.2")]
5964    unsafe fn build_optimal_plan_impl_sse42<
5965        S: super::strategy::Strategy,
5966        const ACCURATE_PRICE: bool,
5967        const FAVOR_SMALL_OFFSETS: bool,
5968    >(
5969        &mut self,
5970        current: &[u8],
5971        current_abs_start: usize,
5972        current_len: usize,
5973        initial_state: HcOptimalPlanState,
5974        stats: &HcOptState,
5975        out: &mut Vec<HcOptimalSequence>,
5976    ) -> (u32, [u32; 3], usize, usize) {
5977        build_optimal_plan_impl_body!(
5978            self,
5979            S,
5980            current,
5981            current_abs_start,
5982            current_len,
5983            initial_state,
5984            stats,
5985            out,
5986            collect_optimal_candidates_initialized_sse42,
5987        )
5988    }
5989
5990    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5991    #[target_feature(enable = "avx2,bmi2")]
5992    unsafe fn build_optimal_plan_impl_avx2_bmi2<
5993        S: super::strategy::Strategy,
5994        const ACCURATE_PRICE: bool,
5995        const FAVOR_SMALL_OFFSETS: bool,
5996    >(
5997        &mut self,
5998        current: &[u8],
5999        current_abs_start: usize,
6000        current_len: usize,
6001        initial_state: HcOptimalPlanState,
6002        stats: &HcOptState,
6003        out: &mut Vec<HcOptimalSequence>,
6004    ) -> (u32, [u32; 3], usize, usize) {
6005        build_optimal_plan_impl_body!(
6006            self,
6007            S,
6008            current,
6009            current_abs_start,
6010            current_len,
6011            initial_state,
6012            stats,
6013            out,
6014            collect_optimal_candidates_initialized_avx2_bmi2,
6015        )
6016    }
6017
6018    #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
6019    // Body macros wrap callees in `unsafe { }` for the NEON/AVX/SSE
6020    // variants where callees are `unsafe fn`. The scalar wrappers route
6021    // through safe fns, so those blocks are redundant on this path.
6022    #[allow(unused_unsafe)]
6023    fn build_optimal_plan_impl_scalar<
6024        S: super::strategy::Strategy,
6025        const ACCURATE_PRICE: bool,
6026        const FAVOR_SMALL_OFFSETS: bool,
6027    >(
6028        &mut self,
6029        current: &[u8],
6030        current_abs_start: usize,
6031        current_len: usize,
6032        initial_state: HcOptimalPlanState,
6033        stats: &HcOptState,
6034        out: &mut Vec<HcOptimalSequence>,
6035    ) -> (u32, [u32; 3], usize, usize) {
6036        build_optimal_plan_impl_body!(
6037            self,
6038            S,
6039            current,
6040            current_abs_start,
6041            current_len,
6042            initial_state,
6043            stats,
6044            out,
6045            collect_optimal_candidates_initialized_scalar,
6046        )
6047    }
6048
6049    #[cfg(test)]
6050    fn collect_optimal_candidates(
6051        &mut self,
6052        abs_pos: usize,
6053        current_abs_end: usize,
6054        profile: HcOptimalCostProfile,
6055        query: HcCandidateQuery,
6056        out: &mut Vec<MatchCandidate>,
6057    ) {
6058        use super::strategy::{self, StrategyTag};
6059        self.table.ensure_tables();
6060        // Dispatch purely from `self.strategy_tag` (set by
6061        // `configure()`). Tests must configure the matcher the same
6062        // way production does — wiring up `table.hash3_log` directly
6063        // without setting a matching `strategy_tag` is no longer
6064        // allowed.
6065        match self.strategy_tag {
6066            StrategyTag::BtUltra2 => self
6067                .collect_optimal_candidates_initialized::<strategy::BtUltra2, true>(
6068                    abs_pos,
6069                    current_abs_end,
6070                    profile,
6071                    query,
6072                    out,
6073                ),
6074            StrategyTag::BtUltra => self
6075                .collect_optimal_candidates_initialized::<strategy::BtUltra, true>(
6076                    abs_pos,
6077                    current_abs_end,
6078                    profile,
6079                    query,
6080                    out,
6081                ),
6082            StrategyTag::Btlazy2 => self
6083                .collect_optimal_candidates_initialized::<strategy::Btlazy2, true>(
6084                    abs_pos,
6085                    current_abs_end,
6086                    profile,
6087                    query,
6088                    out,
6089                ),
6090            StrategyTag::BtOpt => self
6091                .collect_optimal_candidates_initialized::<strategy::BtOpt, true>(
6092                    abs_pos,
6093                    current_abs_end,
6094                    profile,
6095                    query,
6096                    out,
6097                ),
6098            StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
6099                self.collect_optimal_candidates_initialized::<strategy::Lazy, false>(
6100                    abs_pos,
6101                    current_abs_end,
6102                    profile,
6103                    query,
6104                    out,
6105                )
6106            }
6107        }
6108    }
6109
6110    /// Cross-platform entry. Picks the kernel-specific variant so the per-
6111    /// position pipeline (BT-tree fill, rep probing, hash3 probing, BT
6112    /// collect / HC chain walk) runs inside a single `target_feature`
6113    /// umbrella — all inner SIMD probes inline without ABI barriers.
6114    ///
6115    /// The on-encode hot path bypasses this dispatcher: `build_optimal_plan_impl_<kernel>`
6116    /// calls the matching `_<kernel>` variant directly. This entry is kept
6117    /// for the cfg(test)-only `collect_optimal_candidates` shim and any
6118    /// future caller that isn't already inside a kernel umbrella.
6119    #[allow(dead_code)]
6120    #[inline(always)]
6121    fn collect_optimal_candidates_initialized<
6122        S: super::strategy::Strategy,
6123        const USE_BT_MATCHFINDER: bool,
6124    >(
6125        &mut self,
6126        abs_pos: usize,
6127        current_abs_end: usize,
6128        profile: HcOptimalCostProfile,
6129        query: HcCandidateQuery,
6130        out: &mut Vec<MatchCandidate>,
6131    ) {
6132        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6133        unsafe {
6134            self.collect_optimal_candidates_initialized_neon::<S, USE_BT_MATCHFINDER>(
6135                abs_pos,
6136                current_abs_end,
6137                profile,
6138                query,
6139                out,
6140            )
6141        }
6142        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6143        {
6144            use crate::encoding::fastpath::{FastpathKernel, select_kernel};
6145            match select_kernel() {
6146                FastpathKernel::Avx2Bmi2 => unsafe {
6147                    self.collect_optimal_candidates_initialized_avx2_bmi2::<S, USE_BT_MATCHFINDER>(
6148                        abs_pos,
6149                        current_abs_end,
6150                        profile,
6151                        query,
6152                        out,
6153                    )
6154                },
6155                FastpathKernel::Sse42 => unsafe {
6156                    self.collect_optimal_candidates_initialized_sse42::<S, USE_BT_MATCHFINDER>(
6157                        abs_pos,
6158                        current_abs_end,
6159                        profile,
6160                        query,
6161                        out,
6162                    )
6163                },
6164                FastpathKernel::Scalar => self
6165                    .collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
6166                        abs_pos,
6167                        current_abs_end,
6168                        profile,
6169                        query,
6170                        out,
6171                    ),
6172            }
6173        }
6174        #[cfg(not(any(
6175            all(target_arch = "aarch64", target_endian = "little"),
6176            target_arch = "x86",
6177            target_arch = "x86_64"
6178        )))]
6179        {
6180            self.collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
6181                abs_pos,
6182                current_abs_end,
6183                profile,
6184                query,
6185                out,
6186            )
6187        }
6188    }
6189
6190    /// NEON-umbrella variant. Every inner helper (`bt_update_tree_until_neon`,
6191    /// `for_each_repcode_candidate_with_reps_neon`, `hash3_candidate_neon`,
6192    /// `bt_insert_and_collect_matches_neon`, `fastpath::neon::
6193    /// common_prefix_len_ptr`) shares the NEON umbrella so the per-position
6194    /// pipeline executes as a single straight-line inline sequence.
6195    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
6196    #[target_feature(enable = "neon")]
6197    unsafe fn collect_optimal_candidates_initialized_neon<
6198        S: super::strategy::Strategy,
6199        const USE_BT_MATCHFINDER: bool,
6200    >(
6201        &mut self,
6202        abs_pos: usize,
6203        current_abs_end: usize,
6204        profile: HcOptimalCostProfile,
6205        query: HcCandidateQuery,
6206        out: &mut Vec<MatchCandidate>,
6207    ) {
6208        collect_optimal_candidates_initialized_body!(
6209            self,
6210            S,
6211            abs_pos,
6212            current_abs_end,
6213            profile,
6214            query,
6215            out,
6216            USE_BT_MATCHFINDER,
6217            bt_update_tree_until_neon,
6218            bt_insert_and_collect_matches_neon,
6219            for_each_repcode_candidate_with_reps_neon,
6220            hash3_candidate_neon,
6221            crate::encoding::fastpath::neon::common_prefix_len_ptr,
6222        )
6223    }
6224
6225    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6226    #[target_feature(enable = "sse4.2")]
6227    unsafe fn collect_optimal_candidates_initialized_sse42<
6228        S: super::strategy::Strategy,
6229        const USE_BT_MATCHFINDER: bool,
6230    >(
6231        &mut self,
6232        abs_pos: usize,
6233        current_abs_end: usize,
6234        profile: HcOptimalCostProfile,
6235        query: HcCandidateQuery,
6236        out: &mut Vec<MatchCandidate>,
6237    ) {
6238        collect_optimal_candidates_initialized_body!(
6239            self,
6240            S,
6241            abs_pos,
6242            current_abs_end,
6243            profile,
6244            query,
6245            out,
6246            USE_BT_MATCHFINDER,
6247            bt_update_tree_until_sse42,
6248            bt_insert_and_collect_matches_sse42,
6249            for_each_repcode_candidate_with_reps_sse42,
6250            hash3_candidate_sse42,
6251            crate::encoding::fastpath::sse42::common_prefix_len_ptr,
6252        )
6253    }
6254
6255    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6256    #[target_feature(enable = "avx2,bmi2")]
6257    unsafe fn collect_optimal_candidates_initialized_avx2_bmi2<
6258        S: super::strategy::Strategy,
6259        const USE_BT_MATCHFINDER: bool,
6260    >(
6261        &mut self,
6262        abs_pos: usize,
6263        current_abs_end: usize,
6264        profile: HcOptimalCostProfile,
6265        query: HcCandidateQuery,
6266        out: &mut Vec<MatchCandidate>,
6267    ) {
6268        collect_optimal_candidates_initialized_body!(
6269            self,
6270            S,
6271            abs_pos,
6272            current_abs_end,
6273            profile,
6274            query,
6275            out,
6276            USE_BT_MATCHFINDER,
6277            bt_update_tree_until_avx2_bmi2,
6278            bt_insert_and_collect_matches_avx2_bmi2,
6279            for_each_repcode_candidate_with_reps_avx2_bmi2,
6280            hash3_candidate_avx2_bmi2,
6281            crate::encoding::fastpath::avx2_bmi2::common_prefix_len_ptr,
6282        )
6283    }
6284
6285    #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
6286    // Macro emits `unsafe { }` wrappers for NEON/AVX/SSE variants; scalar
6287    // callees are safe so the blocks are redundant here only.
6288    #[allow(unused_unsafe)]
6289    fn collect_optimal_candidates_initialized_scalar<
6290        S: super::strategy::Strategy,
6291        const USE_BT_MATCHFINDER: bool,
6292    >(
6293        &mut self,
6294        abs_pos: usize,
6295        current_abs_end: usize,
6296        profile: HcOptimalCostProfile,
6297        query: HcCandidateQuery,
6298        out: &mut Vec<MatchCandidate>,
6299    ) {
6300        collect_optimal_candidates_initialized_body!(
6301            self,
6302            S,
6303            abs_pos,
6304            current_abs_end,
6305            profile,
6306            query,
6307            out,
6308            USE_BT_MATCHFINDER,
6309            bt_update_tree_until_scalar,
6310            bt_insert_and_collect_matches_scalar,
6311            for_each_repcode_candidate_with_reps_scalar,
6312            hash3_candidate_scalar,
6313            crate::encoding::fastpath::scalar::common_prefix_len_ptr,
6314        )
6315    }
6316}
6317
6318#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
6319#[test]
6320fn matches() {
6321    let mut matcher = MatchGenerator::new(1000);
6322    let mut original_data = Vec::new();
6323    let mut reconstructed = Vec::new();
6324
6325    let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
6326        Sequence::Literals { literals } => {
6327            assert!(!literals.is_empty());
6328            reconstructed.extend_from_slice(literals);
6329        }
6330        Sequence::Triple {
6331            literals,
6332            offset,
6333            match_len,
6334        } => {
6335            assert!(offset > 0);
6336            assert!(match_len >= MIN_MATCH_LEN);
6337            reconstructed.extend_from_slice(literals);
6338            assert!(offset <= reconstructed.len());
6339            let start = reconstructed.len() - offset;
6340            for i in 0..match_len {
6341                let byte = reconstructed[start + i];
6342                reconstructed.push(byte);
6343            }
6344        }
6345    };
6346
6347    matcher.add_data(
6348        alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
6349        SuffixStore::with_capacity(100),
6350        |_, _| {},
6351    );
6352    original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
6353
6354    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6355
6356    assert!(!matcher.next_sequence(|_| {}));
6357
6358    matcher.add_data(
6359        alloc::vec![
6360            1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
6361        ],
6362        SuffixStore::with_capacity(100),
6363        |_, _| {},
6364    );
6365    original_data.extend_from_slice(&[
6366        1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
6367    ]);
6368
6369    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6370    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6371    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6372    assert!(!matcher.next_sequence(|_| {}));
6373
6374    matcher.add_data(
6375        alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
6376        SuffixStore::with_capacity(100),
6377        |_, _| {},
6378    );
6379    original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
6380
6381    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6382    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6383    assert!(!matcher.next_sequence(|_| {}));
6384
6385    matcher.add_data(
6386        alloc::vec![0, 0, 0, 0, 0],
6387        SuffixStore::with_capacity(100),
6388        |_, _| {},
6389    );
6390    original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
6391
6392    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6393    assert!(!matcher.next_sequence(|_| {}));
6394
6395    matcher.add_data(
6396        alloc::vec![7, 8, 9, 10, 11],
6397        SuffixStore::with_capacity(100),
6398        |_, _| {},
6399    );
6400    original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
6401
6402    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6403    assert!(!matcher.next_sequence(|_| {}));
6404
6405    matcher.add_data(
6406        alloc::vec![1, 3, 5, 7, 9],
6407        SuffixStore::with_capacity(100),
6408        |_, _| {},
6409    );
6410    matcher.skip_matching();
6411    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
6412    reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
6413    assert!(!matcher.next_sequence(|_| {}));
6414
6415    matcher.add_data(
6416        alloc::vec![1, 3, 5, 7, 9],
6417        SuffixStore::with_capacity(100),
6418        |_, _| {},
6419    );
6420    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
6421
6422    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6423    assert!(!matcher.next_sequence(|_| {}));
6424
6425    matcher.add_data(
6426        alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
6427        SuffixStore::with_capacity(100),
6428        |_, _| {},
6429    );
6430    original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
6431
6432    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6433    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
6434    assert!(!matcher.next_sequence(|_| {}));
6435
6436    assert_eq!(reconstructed, original_data);
6437}
6438
6439#[test]
6440fn dfast_matches_roundtrip_multi_block_pattern() {
6441    let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
6442    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
6443    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
6444
6445    let mut matcher = DfastMatchGenerator::new(1 << 22);
6446    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
6447        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
6448        Sequence::Triple {
6449            literals,
6450            offset,
6451            match_len,
6452        } => {
6453            decoded.extend_from_slice(literals);
6454            let start = decoded.len() - offset;
6455            for i in 0..match_len {
6456                let byte = decoded[start + i];
6457                decoded.push(byte);
6458            }
6459        }
6460    };
6461
6462    matcher.add_data(first_block.clone(), |_| {});
6463    let mut history = Vec::new();
6464    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
6465    assert_eq!(history, first_block);
6466
6467    matcher.add_data(second_block.clone(), |_| {});
6468    let prefix_len = history.len();
6469    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
6470
6471    assert_eq!(&history[prefix_len..], second_block.as_slice());
6472}
6473
6474/// Regression for the `DFAST_MIN_MATCH_LEN: 6 -> 5` drop. The fixture
6475/// is built so the longest available match is EXACTLY 5 bytes — a
6476/// matcher that still effectively requires a 6-byte floor would emit
6477/// only literals here and the assertion would catch the silent
6478/// 5-byte miss.
6479///
6480/// Fixture layout (34 B):
6481///   bytes 0..5    `"ABCDE"`  — match source
6482///   bytes 5..28   `'!'` × 23 — filler that does NOT start with 'A'
6483///   bytes 28..33  `"ABCDE"`  — match site (repeats the prefix)
6484///   byte  33      `'F'`      — terminator: differs from byte 5 (`'!'`),
6485///                              so the forward extension at the match
6486///                              site stops at exactly length 5.
6487///
6488/// A 5-byte match at offset 28 must be emitted; a 6-byte+ match at the
6489/// same offset must NOT.
6490#[test]
6491fn dfast_accepts_exact_five_byte_match() {
6492    // Layout the input so that:
6493    //   byte  0      = 'Z'            (lead byte — keeps the match SOURCE off
6494    //                                  position 0, which the greedy loop never
6495    //                                  inserts: like the donor it starts the
6496    //                                  cursor at ip+1 and hashes only visited
6497    //                                  positions)
6498    //   bytes 1..6   = "ABCDE"        (the match source — position 1 IS visited)
6499    //   bytes 6..29  = 23 filler bytes that do NOT start with 'A'
6500    //   bytes 29..34 = "ABCDE"        (the 5-byte match site)
6501    //   byte  34     = 'F'            (differs from byte 6 = '!')
6502    // The longest available copy at position 29 is exactly 5 bytes:
6503    // the byte at position 34 ('F') differs from the byte at position 6
6504    // ('!'), so the forward extension stops at length 5.
6505    let mut data = Vec::new();
6506    data.push(b'Z'); // 0
6507    data.extend_from_slice(b"ABCDE"); // 1..6
6508    data.extend_from_slice(b"!!!!!!!!!!!!!!!!!!!!!!!"); // 6..29 (23 bytes)
6509    data.extend_from_slice(b"ABCDE"); // 29..34
6510    data.push(b'F'); // 34: forces forward extension to stop at length 5
6511    // Trailing filler so the match site (29) sits at least HASH_READ_SIZE (8)
6512    // bytes before the block end. The greedy double-fast — like the donor —
6513    // stops probing at `ilimit = iend - HASH_READ_SIZE`, so a match in the
6514    // final 8 bytes is never searched (donor parity, not a regression).
6515    data.extend_from_slice(b"GHIJKLMNOPQRSTUVWXYZ"); // 35..55
6516    assert_eq!(data.len(), 55);
6517
6518    let mut matcher = DfastMatchGenerator::new(1 << 22);
6519    matcher.add_data(data.clone(), |_| {});
6520
6521    let mut saw_five_byte_match = false;
6522    let mut saw_longer_match = false;
6523    matcher.start_matching(|seq| {
6524        if let Sequence::Triple {
6525            offset, match_len, ..
6526        } = seq
6527        {
6528            if offset == 28 && match_len == 5 {
6529                saw_five_byte_match = true;
6530            } else if offset == 28 && match_len > 5 {
6531                saw_longer_match = true;
6532            }
6533        }
6534    });
6535
6536    assert!(
6537        saw_five_byte_match,
6538        "dfast must accept the exact-5-byte match — a 6-byte floor would skip it"
6539    );
6540    assert!(
6541        !saw_longer_match,
6542        "fixture pinned to length 5 — byte 33 ('F') must terminate the extension"
6543    );
6544}
6545
6546#[test]
6547fn driver_switches_backends_and_initializes_dfast_via_reset() {
6548    let mut driver = MatchGeneratorDriver::new(32, 2);
6549
6550    driver.reset(CompressionLevel::Default);
6551    assert_eq!(driver.active_backend(), super::strategy::BackendTag::Dfast);
6552    assert_eq!(driver.window_size(), (1u64 << 21));
6553
6554    let mut first = driver.get_next_space();
6555    first[..12].copy_from_slice(b"abcabcabcabc");
6556    first.truncate(12);
6557    driver.commit_space(first);
6558    assert_eq!(driver.get_last_space(), b"abcabcabcabc");
6559    driver.skip_matching_with_hint(None);
6560
6561    let mut second = driver.get_next_space();
6562    second[..12].copy_from_slice(b"abcabcabcabc");
6563    second.truncate(12);
6564    driver.commit_space(second);
6565
6566    let mut reconstructed = b"abcabcabcabc".to_vec();
6567    driver.start_matching(|seq| match seq {
6568        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
6569        Sequence::Triple {
6570            literals,
6571            offset,
6572            match_len,
6573        } => {
6574            reconstructed.extend_from_slice(literals);
6575            let start = reconstructed.len() - offset;
6576            for i in 0..match_len {
6577                let byte = reconstructed[start + i];
6578                reconstructed.push(byte);
6579            }
6580        }
6581    });
6582    assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
6583
6584    driver.reset(CompressionLevel::Fastest);
6585    assert_eq!(driver.window_size(), (1u64 << 19));
6586}
6587
6588#[test]
6589fn driver_level5_selects_row_backend() {
6590    let mut driver = MatchGeneratorDriver::new(32, 2);
6591    driver.reset(CompressionLevel::Level(5));
6592    assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
6593    // Greedy-specific routing assertion: `MatchGeneratorDriver::start_matching`
6594    // dispatches the Row backend into `start_matching_greedy` iff
6595    // `self.parse == ParseMode::Greedy`, so assert that actual selector —
6596    // round-trip alone passes on the lazy parser too. `row_matcher().lazy_depth`
6597    // is a secondary corroboration of the same routing decision (a mirror of
6598    // the parse mode); checking `parse` directly catches a regression even if
6599    // the two ever drift apart.
6600    assert_eq!(
6601        driver.parse,
6602        super::strategy::ParseMode::Greedy,
6603        "L5 must route to start_matching_greedy (parse == Greedy)",
6604    );
6605    assert_eq!(
6606        driver.row_matcher().lazy_depth,
6607        0,
6608        "row matcher lazy_depth must mirror the greedy parse mode",
6609    );
6610}
6611
6612/// Level 4 maps to `StrategyTag::Dfast` (the greedy double-fast, donor
6613/// `ZSTD_dfast` — "greedy" is the parse discipline, not the Row/Greedy
6614/// strategy at Level 5). Round-trip alone doesn't pin match quality (a lazy
6615/// parser would also reconstruct the input correctly), so this test guards the
6616/// parse output itself: a small repeating pattern must produce at least one
6617/// `Sequence::Triple`, so a future regression that emits literals-only (e.g. a
6618/// `min_match` or rep-probe guard regression) is caught.
6619#[test]
6620fn driver_level4_greedy_round_trip_single_slice() {
6621    let mut driver = MatchGeneratorDriver::new(64, 2);
6622    driver.reset(CompressionLevel::Level(4));
6623    let input = b"abcdefgh_abcdefgh_abcdefgh_abcdefgh";
6624    let mut space = driver.get_next_space();
6625    space[..input.len()].copy_from_slice(input);
6626    space.truncate(input.len());
6627    driver.commit_space(space);
6628
6629    let mut reconstructed: Vec<u8> = Vec::new();
6630    let mut saw_triple = false;
6631    driver.start_matching(|seq| match seq {
6632        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
6633        Sequence::Triple {
6634            literals,
6635            offset,
6636            match_len,
6637        } => {
6638            saw_triple = true;
6639            reconstructed.extend_from_slice(literals);
6640            let start = reconstructed.len() - offset;
6641            for i in 0..match_len {
6642                let byte = reconstructed[start + i];
6643                reconstructed.push(byte);
6644            }
6645        }
6646    });
6647    assert_eq!(
6648        reconstructed,
6649        input.to_vec(),
6650        "L4 greedy parse failed to reconstruct repeating-pattern input",
6651    );
6652    assert!(
6653        saw_triple,
6654        "L4 greedy parse on a repeating pattern must emit at least one match (Triple)",
6655    );
6656}
6657
6658#[test]
6659fn driver_level4_greedy_round_trip_cross_slice() {
6660    // Verifies that the greedy parse carries repcode / hash-table state
6661    // across slice boundaries: the second slice repeats the first byte
6662    // for byte, so the parse must pick up matches reaching back into
6663    // the previous slice's history.
6664    let mut driver = MatchGeneratorDriver::new(32, 4);
6665    driver.reset(CompressionLevel::Level(4));
6666    let chunk = b"the quick brown fox jumps over!!";
6667    assert_eq!(chunk.len(), 32);
6668
6669    let mut first = driver.get_next_space();
6670    first[..chunk.len()].copy_from_slice(chunk);
6671    first.truncate(chunk.len());
6672    driver.commit_space(first);
6673
6674    let mut first_recon: Vec<u8> = Vec::new();
6675    driver.start_matching(|seq| match seq {
6676        Sequence::Literals { literals } => first_recon.extend_from_slice(literals),
6677        Sequence::Triple {
6678            literals,
6679            offset,
6680            match_len,
6681        } => {
6682            first_recon.extend_from_slice(literals);
6683            let start = first_recon.len() - offset;
6684            for i in 0..match_len {
6685                let byte = first_recon[start + i];
6686                first_recon.push(byte);
6687            }
6688        }
6689    });
6690    assert_eq!(
6691        first_recon,
6692        chunk.to_vec(),
6693        "first slice failed to round-trip"
6694    );
6695
6696    let mut second = driver.get_next_space();
6697    second[..chunk.len()].copy_from_slice(chunk);
6698    second.truncate(chunk.len());
6699    driver.commit_space(second);
6700
6701    let mut full = first_recon.clone();
6702    let mut saw_cross_slice_match = false;
6703    driver.start_matching(|seq| match seq {
6704        Sequence::Literals { literals } => full.extend_from_slice(literals),
6705        Sequence::Triple {
6706            literals,
6707            offset,
6708            match_len,
6709        } => {
6710            // A match whose offset reaches >= the current slice's literal
6711            // run plus the second slice's index means we matched into the
6712            // first slice — exactly the cross-slice behavior under test.
6713            if offset >= chunk.len() {
6714                saw_cross_slice_match = true;
6715            }
6716            full.extend_from_slice(literals);
6717            let start = full.len() - offset;
6718            for i in 0..match_len {
6719                let byte = full[start + i];
6720                full.push(byte);
6721            }
6722        }
6723    });
6724    let mut expected = chunk.to_vec();
6725    expected.extend_from_slice(chunk);
6726    assert_eq!(
6727        full, expected,
6728        "cross-slice L4 greedy parse failed to reconstruct"
6729    );
6730    assert!(
6731        saw_cross_slice_match,
6732        "L4 greedy parse must match across slice boundaries (history is shared)",
6733    );
6734}
6735
6736/// Helper: round-trip `data` through the L4 greedy parse and assert
6737/// the reconstructed bytes match. Returns `(triple_count, max_offset)`
6738/// so callers can probe parse shape (matches emitted, max-offset).
6739#[cfg(test)]
6740impl MatchGeneratorDriver {
6741    /// Test-only: stage a parse×search recipe override applied on the
6742    /// next `reset()`. Routes a level through a non-default (parse,
6743    /// search) pair so the decoupling can be exercised end-to-end.
6744    pub(crate) fn set_config_override(
6745        &mut self,
6746        search: super::strategy::SearchMethod,
6747        parse: super::strategy::ParseMode,
6748    ) {
6749        self.config_override = Some((search, parse));
6750    }
6751
6752    /// Test-only: reset `level` routed onto the lazy HashChain pairing.
6753    /// The lazy band runs on the Row backend in production, so HC-specific
6754    /// behaviour (live-chain dict prime, eviction budget accounting, seed
6755    /// pass gates) is exercised through this override-backed reset.
6756    pub(crate) fn reset_on_hc_lazy(&mut self, level: CompressionLevel) {
6757        self.set_config_override(
6758            super::strategy::SearchMethod::HashChain,
6759            super::strategy::ParseMode::Lazy2,
6760        );
6761        self.reset(level);
6762    }
6763}
6764
6765/// Drive a full compress parse for `data` at `level` (optionally with a
6766/// parse×search override) and reconstruct the bytes from the emitted
6767/// sequences. The returned buffer must equal `data` for a correct parse.
6768#[cfg(test)]
6769fn drive_roundtrip_with_override(
6770    level: CompressionLevel,
6771    over: Option<(super::strategy::SearchMethod, super::strategy::ParseMode)>,
6772    data: &[u8],
6773) -> Vec<u8> {
6774    let mut driver = MatchGeneratorDriver::new(1 << 17, 8);
6775    if let Some((s, p)) = over {
6776        driver.set_config_override(s, p);
6777    }
6778    driver.reset(level);
6779
6780    let mut out: Vec<u8> = Vec::with_capacity(data.len());
6781    let mut offset_in_data = 0usize;
6782    while offset_in_data < data.len() {
6783        let mut space = driver.get_next_space();
6784        let take = (data.len() - offset_in_data).min(space.len());
6785        space[..take].copy_from_slice(&data[offset_in_data..offset_in_data + take]);
6786        space.truncate(take);
6787        driver.commit_space(space);
6788        offset_in_data += take;
6789
6790        driver.start_matching(|seq| match seq {
6791            Sequence::Literals { literals } => out.extend_from_slice(literals),
6792            Sequence::Triple {
6793                literals,
6794                offset,
6795                match_len,
6796            } => {
6797                out.extend_from_slice(literals);
6798                let start = out.len() - offset;
6799                for i in 0..match_len {
6800                    let byte = out[start + i];
6801                    out.push(byte);
6802                }
6803            }
6804        });
6805    }
6806    out
6807}
6808
6809/// Phase 1 capability proof: parse and search are decoupled, so a level
6810/// can run any parse mode on any non-opt search backend. Greedy-on-
6811/// HashChain and Lazy2-on-RowHash are pairings the legacy `strategy_tag`
6812/// could not express; both must reconstruct the input exactly.
6813#[test]
6814fn parse_search_matrix_decoupled_roundtrips() {
6815    use super::strategy::{ParseMode, SearchMethod};
6816    // Mixed repetitive + literal payload that exercises matches and reps.
6817    let mut data = Vec::new();
6818    for i in 0..4000u32 {
6819        data.extend_from_slice(b"the quick brown fox ");
6820        data.extend_from_slice(&i.to_le_bytes());
6821    }
6822
6823    // Greedy parse on the HashChain search backend (legacy: Greedy was
6824    // welded to RowHash).
6825    let got = drive_roundtrip_with_override(
6826        CompressionLevel::Level(5),
6827        Some((SearchMethod::HashChain, ParseMode::Greedy)),
6828        &data,
6829    );
6830    assert_eq!(got, data, "greedy-on-hashchain diverged");
6831
6832    // Lazy2 parse on the RowHash search backend (legacy: Lazy was welded
6833    // to HashChain).
6834    let got = drive_roundtrip_with_override(
6835        CompressionLevel::Level(8),
6836        Some((SearchMethod::RowHash, ParseMode::Lazy2)),
6837        &data,
6838    );
6839    assert_eq!(got, data, "lazy2-on-rowhash diverged");
6840
6841    // Lazy on RowHash too (depth 1).
6842    let got = drive_roundtrip_with_override(
6843        CompressionLevel::Level(6),
6844        Some((SearchMethod::RowHash, ParseMode::Lazy)),
6845        &data,
6846    );
6847    assert_eq!(got, data, "lazy-on-rowhash diverged");
6848}
6849
6850/// The row `mls` knob (C-like `minMatch`) is respected: every accepted
6851/// match (regular row + repcode, on the lazy parse) is at least `mls`
6852/// bytes, and the stream still round-trips for the whole 4..=7 range. The
6853/// default (5) reproduces the historical `ROW_MIN_MATCH_LEN` behaviour.
6854#[test]
6855fn row_mls_knob_gates_matches_and_roundtrips() {
6856    let data: Vec<u8> = (0..4000u32)
6857        .flat_map(|i| {
6858            let mut v = b"abcdefgh".to_vec();
6859            v.extend_from_slice(&i.to_le_bytes());
6860            v
6861        })
6862        .collect();
6863
6864    for mls in [4usize, 5, 6, 7] {
6865        let mut matcher = RowMatchGenerator::new(1 << 22);
6866        let mut cfg = ROW_CONFIG;
6867        cfg.mls = mls;
6868        matcher.configure(cfg);
6869        matcher.add_data(data.clone(), |_| {});
6870
6871        let mut out: Vec<u8> = Vec::with_capacity(data.len());
6872        let mut shortest_match = usize::MAX;
6873        matcher.start_matching(|seq| match seq {
6874            Sequence::Literals { literals } => out.extend_from_slice(literals),
6875            Sequence::Triple {
6876                literals,
6877                offset,
6878                match_len,
6879            } => {
6880                out.extend_from_slice(literals);
6881                shortest_match = shortest_match.min(match_len);
6882                let start = out.len() - offset;
6883                for i in 0..match_len {
6884                    let byte = out[start + i];
6885                    out.push(byte);
6886                }
6887            }
6888        });
6889
6890        assert_eq!(out, data, "mls={mls} round-trip diverged");
6891        if shortest_match != usize::MAX {
6892            assert!(
6893                shortest_match >= mls,
6894                "mls={mls}: emitted a {shortest_match}-byte match below the floor",
6895            );
6896        }
6897    }
6898}
6899
6900/// `LevelParams::parse()` derives the parse mode from the `search` axis, not
6901/// the strategy tag, so the decoupling holds even for a `Bt*`-tagged level
6902/// overridden to a non-BT search backend. Pre-fix the method matched on
6903/// `strategy_tag` and returned `Optimal` for any `Bt*` tag regardless of
6904/// `search`/`lazy_depth`.
6905#[test]
6906fn parse_mode_follows_search_axis_not_strategy_tag() {
6907    use super::strategy::{ParseMode, SearchMethod};
6908    // LEVEL_TABLE[15] is level 16: BtOpt tag, BinaryTree search.
6909    let mut p = LEVEL_TABLE[15];
6910    assert_eq!(p.parse(), ParseMode::Optimal, "BinaryTree search → Optimal");
6911    // Override the Bt-tagged level's search to a non-BT backend: parse must
6912    // follow the search axis (derive from lazy_depth), not stay Optimal.
6913    p.search = SearchMethod::RowHash;
6914    p.lazy_depth = 0;
6915    assert_eq!(p.parse(), ParseMode::Greedy, "RowHash + depth 0 → Greedy");
6916    p.lazy_depth = 2;
6917    assert_eq!(p.parse(), ParseMode::Lazy2, "RowHash + depth 2 → Lazy2");
6918}
6919
6920/// The test-only `config_override` is consumed by the first `reset()` (one
6921/// shot), so a reused driver does not silently keep the synthetic pairing
6922/// armed across later resets. Pre-fix `reset()` copied the override and left
6923/// it set.
6924#[test]
6925fn config_override_is_consumed_by_reset() {
6926    use super::strategy::{ParseMode, SearchMethod};
6927    let mut driver = MatchGeneratorDriver::new(1 << 17, 8);
6928    driver.set_config_override(SearchMethod::RowHash, ParseMode::Lazy2);
6929    assert!(driver.config_override.is_some());
6930    driver.reset(CompressionLevel::Level(5));
6931    assert!(
6932        driver.config_override.is_none(),
6933        "override must be consumed after one reset",
6934    );
6935}
6936
6937// Level 4 maps to the greedy Dfast (double-fast) backend — "greedy" here is the
6938// parse discipline (no lazy lookahead, donor `ZSTD_dfast`), NOT the Row/Greedy
6939// strategy (which is Level 5). This roundtrip is intentional Dfast L4 coverage;
6940// the Row backend is exercised by the `Level(5)` fixtures elsewhere in this file.
6941#[cfg(test)]
6942fn l4_greedy_round_trip(slice_size: usize, max_slices: usize, data: &[u8]) -> (usize, usize) {
6943    let mut driver = MatchGeneratorDriver::new(slice_size, max_slices);
6944    driver.reset(CompressionLevel::Level(4));
6945
6946    let mut reconstructed: Vec<u8> = Vec::with_capacity(data.len());
6947    let mut triple_count = 0usize;
6948    let mut max_offset = 0usize;
6949
6950    // `start_matching` consumes the current pending slice; multi-slice
6951    // payloads require commit + drive per slice so earlier slices'
6952    // bytes actually round-trip out before they're displaced from the
6953    // window.
6954    let mut offset_in_data = 0usize;
6955    while offset_in_data < data.len() {
6956        let mut space = driver.get_next_space();
6957        let space_cap = space.len();
6958        let take = (data.len() - offset_in_data).min(space_cap);
6959        space[..take].copy_from_slice(&data[offset_in_data..offset_in_data + take]);
6960        space.truncate(take);
6961        driver.commit_space(space);
6962        offset_in_data += take;
6963
6964        driver.start_matching(|seq| match seq {
6965            Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
6966            Sequence::Triple {
6967                literals,
6968                offset,
6969                match_len,
6970            } => {
6971                triple_count += 1;
6972                if offset > max_offset {
6973                    max_offset = offset;
6974                }
6975                reconstructed.extend_from_slice(literals);
6976                let start = reconstructed.len() - offset;
6977                for i in 0..match_len {
6978                    let byte = reconstructed[start + i];
6979                    reconstructed.push(byte);
6980                }
6981            }
6982        });
6983    }
6984
6985    // Empty payload still needs one commit/drive round so the empty-
6986    // input path of `start_matching_greedy` (the `current_len == 0`
6987    // early-return guard) gets exercised.
6988    if data.is_empty() {
6989        let mut space = driver.get_next_space();
6990        space.truncate(0);
6991        driver.commit_space(space);
6992        driver.start_matching(|seq| match seq {
6993            Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
6994            Sequence::Triple { .. } => panic!("empty input must not emit any matches"),
6995        });
6996    }
6997
6998    assert_eq!(reconstructed, data, "L4 greedy round-trip diverged");
6999    (triple_count, max_offset)
7000}
7001
7002/// CodeRabbit-flagged tail rep-only case: the previous outer-loop
7003/// guard `pos + ROW_MIN_MATCH_LEN <= current_len` (6) meant the last
7004/// 5-byte position was unreachable. The rep probe at `abs_pos + 1`
7005/// only needs 4 bytes of lookahead beyond the probe point, so the
7006/// guard was relaxed to `pos + GREEDY_MIN_LOOKAHEAD <= current_len`
7007/// (5). This test drives the slices separately and asserts a match
7008/// is emitted **from the second slice's parse pass**, so a future
7009/// regression that re-tightens the guard or breaks the cross-slice
7010/// repcode lookup fails the test instead of being masked by
7011/// first-slice matches.
7012#[test]
7013fn driver_level5_greedy_tail_rep_only_reachable() {
7014    // Period-4 first slice locks rep1 = 4 into `offset_hist` by the
7015    // time the parse reaches the slice tail. Second slice is exactly
7016    // 5 bytes ( = `GREEDY_MIN_LOOKAHEAD`) so the outer loop runs
7017    // **once** at `pos = 0`; the regular `row_candidate` requires 6
7018    // bytes from `abs_pos`, which is past the live history, so the
7019    // only viable hit is the `abs_pos + 1` rep probe. `second[0..]`
7020    // is shaped so the rep probe at `abs_pos + 1` finds a 4-byte
7021    // match at offset 4 (`second[1..5] == first[13..16] ++ second[0]
7022    // == "BCDA"`), and `extend_backwards_shared` then absorbs
7023    // `second[0]` into the match (extending one byte back into the
7024    // implicit anchor, no further because anchor itself is the
7025    // current `abs_pos`).
7026    let first: &[u8] = b"ABCDABCDABCDABCD"; // 16 bytes — strict period 4
7027    let second: &[u8] = b"ABCDA"; // 5 bytes — exact GREEDY_MIN_LOOKAHEAD
7028    let mut driver = MatchGeneratorDriver::new(16, 2);
7029    driver.reset(CompressionLevel::Level(5));
7030
7031    let mut first_space = driver.get_next_space();
7032    first_space[..first.len()].copy_from_slice(first);
7033    first_space.truncate(first.len());
7034    driver.commit_space(first_space);
7035    driver.start_matching(|_| {});
7036
7037    let mut second_space = driver.get_next_space();
7038    second_space[..second.len()].copy_from_slice(second);
7039    second_space.truncate(second.len());
7040    driver.commit_space(second_space);
7041
7042    let mut second_slice_triples = 0usize;
7043    driver.start_matching(|seq| {
7044        if matches!(seq, Sequence::Triple { .. }) {
7045            second_slice_triples += 1;
7046        }
7047    });
7048
7049    assert!(
7050        second_slice_triples >= 1,
7051        "tail rep-only position must produce a match in the second slice \
7052         (got {second_slice_triples} triples)",
7053    );
7054}
7055
7056#[test]
7057fn driver_level4_greedy_empty_input_emits_nothing() {
7058    // Empty input: no slices committed → no sequences emitted, no
7059    // panic. Exercises the `current_len == 0` early-return guard at
7060    // the top of `start_matching_greedy`.
7061    let mut driver = MatchGeneratorDriver::new(64, 2);
7062    driver.reset(CompressionLevel::Level(4));
7063    // Commit an empty space so the matcher has SOMETHING to start
7064    // matching on (otherwise `start_matching` panics on the
7065    // `window.back()` unwrap — that's a separate path covered by
7066    // existing reset tests).
7067    let mut space = driver.get_next_space();
7068    space.truncate(0);
7069    driver.commit_space(space);
7070    let mut emitted_anything = false;
7071    driver.start_matching(|_| emitted_anything = true);
7072    assert!(!emitted_anything, "empty slice must not emit any sequences",);
7073}
7074
7075#[test]
7076fn driver_level4_greedy_sub_min_lookahead_input() {
7077    // Input shorter than `GREEDY_MIN_LOOKAHEAD = 5` — the outer loop
7078    // never executes a body iteration; the tail literal path must
7079    // still emit the input bytes as a single `Sequence::Literals`.
7080    let data: &[u8] = b"abcd"; // 4 bytes
7081    let (triples, _) = l4_greedy_round_trip(64, 2, data);
7082    assert_eq!(
7083        triples, 0,
7084        "sub-min-lookahead input must not emit any matches (got {triples})",
7085    );
7086}
7087
7088#[test]
7089fn driver_level4_greedy_incompressible_input() {
7090    // Pseudo-random bytes with no exploitable structure — every
7091    // position is a "miss" in both the rep probe and the row
7092    // candidate. Exercises the miss branch + `SKIP_STRENGTH = 10`
7093    // skip-step grow (irrelevant at this size, but the path runs).
7094    let mut data = alloc::vec::Vec::with_capacity(256);
7095    let mut x: u32 = 0xDEAD_BEEF;
7096    for _ in 0..256 {
7097        x = x.wrapping_mul(1_103_515_245).wrapping_add(12345);
7098        data.push((x >> 16) as u8);
7099    }
7100    let (_triples, _) = l4_greedy_round_trip(64, 8, &data);
7101    // No structural assertion — the test passes if round-trip is
7102    // bit-exact and no panic / debug_assert fires.
7103}
7104
7105#[test]
7106fn driver_level4_greedy_long_literal_run_skip_step_growth() {
7107    // 2 KiB of unstructured bytes drives the literal-run length past
7108    // the `SKIP_STRENGTH = 10` threshold (~1 KiB), so the miss branch
7109    // + per-miss step-grow path in `start_matching_greedy` is
7110    // exercised. This test is a stress smoke — it only asserts
7111    // bit-exact round-trip + no panic / `debug_assert!` fires; it
7112    // does NOT pin the `SKIP_STRENGTH` constant or the per-iteration
7113    // step count (round-trip would still pass on `SKIP_STRENGTH = 6`
7114    // or `= 14` since both produce valid sequences). Pinning the
7115    // exact step growth would require returning step / iteration
7116    // metadata from the parse, which is invasive plumbing for a
7117    // constant that hasn't been re-tuned in months. The value of
7118    // this test is catching panics or correctness regressions on
7119    // long incompressible runs, which is what its existing
7120    // round-trip assertion checks.
7121    let mut data = alloc::vec::Vec::with_capacity(2048);
7122    let mut x: u32 = 0xC0FF_EE00;
7123    for _ in 0..2048 {
7124        x = x.wrapping_mul(0x9E37_79B9).wrapping_add(0xCAFEBABE);
7125        data.push((x >> 24) as u8);
7126    }
7127    let (_triples, _) = l4_greedy_round_trip(512, 8, &data);
7128}
7129
7130#[test]
7131fn driver_level4_greedy_all_zeros_heavy_rep1() {
7132    // All zeros: every position after the first byte has `byte[pos]
7133    // == byte[pos - 1]`, so the rep1 probe at `abs_pos + 1` hits
7134    // immediately and the parse collapses to a single long match.
7135    // Exercises the `cheap rep at +1, full-match length` path.
7136    let data: Vec<u8> = alloc::vec![0u8; 128];
7137    let (triples, max_offset) = l4_greedy_round_trip(64, 8, &data);
7138    assert!(
7139        triples >= 1,
7140        "all-zeros input must produce at least one rep1 match",
7141    );
7142    // The dominant match should reference rep1 (offset 1), since
7143    // every byte at pos matches pos-1. A larger offset would
7144    // indicate the rep1 probe was bypassed.
7145    assert_eq!(
7146        max_offset, 1,
7147        "all-zeros L4 greedy parse should commit at offset 1 (got {max_offset})",
7148    );
7149}
7150
7151/// Periodic-pattern payload covers the steady-state rep-cascade path
7152/// of the greedy parse — the main-loop rep probe at `abs_pos + 1`
7153/// fires every iteration once the period is locked into
7154/// `offset_hist[0]`, and the parse emits a long chain of triples at
7155/// the same offset.
7156#[test]
7157fn driver_level4_greedy_periodic_pattern_rep_cascade() {
7158    let unit: &[u8] = b"alpha_beta_gamma";
7159    assert_eq!(unit.len(), 16);
7160    let mut data: Vec<u8> = Vec::with_capacity(unit.len() * 32);
7161    for _ in 0..32 {
7162        data.extend_from_slice(unit);
7163    }
7164    let (triples, max_offset) = l4_greedy_round_trip(64, 16, &data);
7165    assert!(
7166        triples >= 1,
7167        "periodic 16-byte payload must emit matches (got {triples})",
7168    );
7169    assert!(
7170        max_offset >= 16,
7171        "periodic 16-byte payload must produce at least one offset >= 16 \
7172         (got max_offset = {max_offset})",
7173    );
7174}
7175
7176#[test]
7177fn driver_reset_keeps_strategy_tag_in_sync_with_active_backend() {
7178    use super::strategy::StrategyTag;
7179
7180    fn check(level: CompressionLevel, expected: StrategyTag) {
7181        let mut driver = MatchGeneratorDriver::new(32, 2);
7182        driver.reset(level);
7183        assert_eq!(
7184            driver.strategy_tag, expected,
7185            "strategy_tag wrong for {level:?}"
7186        );
7187        assert_eq!(
7188            driver.strategy_tag.backend(),
7189            driver.active_backend(),
7190            "strategy_tag backend disagrees with active_backend for {level:?}"
7191        );
7192    }
7193
7194    check(CompressionLevel::Level(1), StrategyTag::Fast);
7195    check(CompressionLevel::Level(2), StrategyTag::Fast);
7196    check(CompressionLevel::Level(3), StrategyTag::Dfast);
7197    check(CompressionLevel::Level(4), StrategyTag::Dfast);
7198    check(CompressionLevel::Level(5), StrategyTag::Greedy);
7199    check(CompressionLevel::Level(7), StrategyTag::Lazy);
7200    check(CompressionLevel::Level(12), StrategyTag::Lazy);
7201    check(CompressionLevel::Level(13), StrategyTag::Btlazy2);
7202    check(CompressionLevel::Level(14), StrategyTag::Btlazy2);
7203    check(CompressionLevel::Level(15), StrategyTag::Btlazy2);
7204    check(CompressionLevel::Level(16), StrategyTag::BtOpt);
7205    check(CompressionLevel::Level(18), StrategyTag::BtUltra);
7206    check(CompressionLevel::Level(22), StrategyTag::BtUltra2);
7207    check(CompressionLevel::Fastest, StrategyTag::Fast);
7208    check(CompressionLevel::Default, StrategyTag::Dfast);
7209    check(CompressionLevel::Better, StrategyTag::Lazy);
7210    // `Best` sits on level 13 (the first dominant point of the deep band).
7211    check(CompressionLevel::Best, StrategyTag::Btlazy2);
7212}
7213
7214#[test]
7215fn level_16_17_map_to_btopt_strategy() {
7216    use super::strategy::{BackendTag, StrategyTag};
7217    let p16 = resolve_level_params(CompressionLevel::Level(16), None);
7218    let p17 = resolve_level_params(CompressionLevel::Level(17), None);
7219    assert_eq!(p16.backend(), BackendTag::HashChain);
7220    assert_eq!(p17.backend(), BackendTag::HashChain);
7221    assert_eq!(StrategyTag::for_level(16), StrategyTag::BtOpt);
7222    assert_eq!(StrategyTag::for_level(17), StrategyTag::BtOpt);
7223}
7224
7225#[test]
7226fn level_18_maps_to_btultra_level_19_to_btultra2_strategy() {
7227    use super::strategy::{BackendTag, StrategyTag};
7228    // Donor `clevels.h` (srcSize > 256 KiB tier): level 18 = `ZSTD_btultra`,
7229    // level 19 = `ZSTD_btultra2`. Level 19 was previously mapped to plain
7230    // btultra, which under-searched (searchLog 6 vs 7) and lost ~3.7% ratio
7231    // on the repo corpus.
7232    let p18 = resolve_level_params(CompressionLevel::Level(18), None);
7233    let p19 = resolve_level_params(CompressionLevel::Level(19), None);
7234    assert_eq!(p18.backend(), BackendTag::HashChain);
7235    assert_eq!(p19.backend(), BackendTag::HashChain);
7236    assert_eq!(StrategyTag::for_level(18), StrategyTag::BtUltra);
7237    assert_eq!(StrategyTag::for_level(19), StrategyTag::BtUltra2);
7238}
7239
7240#[test]
7241fn level_20_22_map_to_btultra2_strategy() {
7242    use super::strategy::{BackendTag, StrategyTag};
7243    for level in 20..=22 {
7244        let params = resolve_level_params(CompressionLevel::Level(level), None);
7245        assert_eq!(params.backend(), BackendTag::HashChain);
7246        assert_eq!(StrategyTag::for_level(level as u8), StrategyTag::BtUltra2);
7247    }
7248}
7249
7250#[test]
7251fn level22_uses_donor_target_length_and_large_input_tables() {
7252    let params = resolve_level_params(CompressionLevel::Level(22), None);
7253    assert_eq!(params.window_log, 27);
7254    let hc = params.hc.unwrap();
7255    assert_eq!(hc.hash_log, 25);
7256    assert_eq!(hc.chain_log, 27);
7257    assert_eq!(hc.search_depth, 1 << 9);
7258    assert_eq!(hc.target_len, 999);
7259}
7260
7261#[test]
7262fn bt_levels_16_to_21_pin_clevels_params() {
7263    // Pins the BT-level (window_log, hash_log, chain_log, search_depth,
7264    // target_len) tuples so the clevels.h alignment cannot silently drift.
7265    // Levels 16-20 mirror upstream `clevels.h` (srcSize > 256 KiB tier,
7266    // search_depth = 1 << searchLog); level 21 intentionally keeps a deeper
7267    // search_depth (512 vs upstream's 128) — it beats C on ratio there and
7268    // the deeper walk is a deliberate ratio-positive divergence.
7269    let expected = [
7270        // (level, window_log, hash_log, chain_log, search_depth, target_len)
7271        (16u8, 22u8, 22usize, 22usize, 32usize, 48usize),
7272        (17, 23, 22, 23, 32, 64),
7273        (18, 23, 22, 23, 64, 64),
7274        (19, 23, 22, 24, 128, 256),
7275        (20, 25, 23, 25, 128, 256),
7276        (21, 26, 24, 24, 512, 256),
7277    ];
7278    for (level, wlog, hlog, clog, sd, tl) in expected {
7279        let p = resolve_level_params(CompressionLevel::Level(level as i32), None);
7280        assert_eq!(p.window_log, wlog, "level {level} window_log");
7281        let hc = p.hc.unwrap();
7282        assert_eq!(hc.hash_log, hlog, "level {level} hash_log");
7283        assert_eq!(hc.chain_log, clog, "level {level} chain_log");
7284        assert_eq!(hc.search_depth, sd, "level {level} search_depth");
7285        assert_eq!(hc.target_len, tl, "level {level} target_len");
7286    }
7287}
7288
7289#[test]
7290fn level22_source_size_hint_uses_donor_btultra2_tiers() {
7291    let p16k = resolve_level_params(CompressionLevel::Level(22), Some(16 * 1024));
7292    assert_eq!(p16k.window_log, 14);
7293    let hc16k = p16k.hc.unwrap();
7294    assert_eq!(hc16k.hash_log, 15);
7295    assert_eq!(hc16k.chain_log, 15);
7296    assert_eq!(hc16k.search_depth, 1 << 10);
7297    assert_eq!(hc16k.target_len, 999);
7298
7299    let p128k = resolve_level_params(CompressionLevel::Level(22), Some(128 * 1024));
7300    assert_eq!(p128k.window_log, 17);
7301    let hc128k = p128k.hc.unwrap();
7302    assert_eq!(hc128k.hash_log, 17);
7303    assert_eq!(hc128k.chain_log, 18);
7304    assert_eq!(hc128k.search_depth, 1 << 11);
7305    assert_eq!(hc128k.target_len, 999);
7306
7307    let p256k = resolve_level_params(CompressionLevel::Level(22), Some(256 * 1024));
7308    assert_eq!(p256k.window_log, 18);
7309    let hc256k = p256k.hc.unwrap();
7310    assert_eq!(hc256k.hash_log, 19);
7311    assert_eq!(hc256k.chain_log, 19);
7312    assert_eq!(hc256k.search_depth, 1 << 13);
7313    assert_eq!(hc256k.target_len, 999);
7314}
7315
7316#[test]
7317fn level22_small_source_size_hint_matches_donor_cparams() {
7318    use zstd::zstd_safe::zstd_sys;
7319
7320    let source_size = 15_027u64;
7321    let donor = unsafe { zstd_sys::ZSTD_getCParams(22, source_size, 0) };
7322    let params = resolve_level_params(CompressionLevel::Level(22), Some(source_size));
7323
7324    let hc = params.hc.unwrap();
7325    assert_eq!(params.window_log as u32, donor.windowLog);
7326    assert_eq!(hc.chain_log as u32, donor.chainLog);
7327    assert_eq!(hc.hash_log as u32, donor.hashLog);
7328    assert_eq!(hc.search_depth as u32, 1u32 << donor.searchLog);
7329    assert_eq!(HC_OPT_MIN_MATCH_LEN as u32, donor.minMatch);
7330    assert_eq!(hc.target_len as u32, donor.targetLength);
7331}
7332
7333#[test]
7334fn level22_small_source_uses_window_bounded_hash3_log() {
7335    let mut hc = HcMatchGenerator::new(1 << 14);
7336    hc.configure(
7337        BTULTRA2_HC_CONFIG_L22_16K,
7338        super::strategy::StrategyTag::BtUltra2,
7339        14,
7340    );
7341    assert_eq!(hc.table.hash3_log, 14);
7342
7343    hc.configure(
7344        BTULTRA2_HC_CONFIG_L22,
7345        super::strategy::StrategyTag::BtUltra2,
7346        27,
7347    );
7348    assert_eq!(hc.table.hash3_log, HC3_HASH_LOG);
7349}
7350
7351#[test]
7352fn btultra2_seed_pass_initializes_opt_state() {
7353    let mut hc = HcMatchGenerator::new(1 << 20);
7354    hc.configure(
7355        BTULTRA2_HC_CONFIG,
7356        super::strategy::StrategyTag::BtUltra2,
7357        26,
7358    );
7359    let data: Vec<u8> = (0..32 * 1024).map(|i| (i % 251) as u8).collect();
7360    hc.table.add_data(data, |_| {});
7361    hc.start_matching(|_| {});
7362    assert!(
7363        hc.backend.bt_mut().opt_state.lit_length_sum > 0,
7364        "btultra2 first block should seed non-zero sequence statistics"
7365    );
7366    assert!(
7367        hc.backend.bt_mut().opt_state.off_code_sum > 0,
7368        "btultra2 first block should seed offset-code statistics"
7369    );
7370}
7371
7372#[test]
7373fn btultra2_profile_disables_small_offset_handicap() {
7374    // Pre-Phase-3 this test duplicated the profile build with
7375    // `pass2=false` and `pass2=true` since `for_mode` differentiated
7376    // them. With `const_for_strategy::<BtUltra2>()` there is only one
7377    // profile — the donor `opt2` pricing — so a single binding
7378    // captures the invariant the test is asserting.
7379    let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7380    assert!(
7381        !profile.favor_small_offsets,
7382        "btultra2 should match donor opt2 offset pricing"
7383    );
7384    assert!(
7385        profile.accurate,
7386        "btultra2 should use donor opt2 accurate pricing"
7387    );
7388}
7389
7390#[test]
7391fn btultra_profile_keeps_donor_search_depth_budget() {
7392    let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra>();
7393    assert_eq!(
7394        p.max_chain_depth, 64,
7395        "btultra chain-depth budget must match clevels.h level 18 searchLog 6 (1 << 6 = 64)"
7396    );
7397}
7398
7399#[test]
7400fn btopt_profile_keeps_donor_search_depth_budget() {
7401    let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtOpt>();
7402    assert_eq!(
7403        p.max_chain_depth, 32,
7404        "btopt should not cap chain depth below donor btopt search budget"
7405    );
7406}
7407
7408#[test]
7409fn sufficient_match_len_is_clamped_by_target_len() {
7410    let mut hc = HcMatchGenerator::new(1 << 20);
7411    hc.configure(
7412        BTULTRA2_HC_CONFIG,
7413        super::strategy::StrategyTag::BtUltra2,
7414        26,
7415    );
7416    hc.hc.target_len = 13;
7417    let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7418    assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 13);
7419}
7420
7421#[test]
7422fn opt_modes_use_target_len_as_sufficient_len() {
7423    use super::strategy;
7424    let mut hc = HcMatchGenerator::new(1 << 20);
7425    hc.hc.target_len = 57;
7426    let profiles = [
7427        HcOptimalCostProfile::const_for_strategy::<strategy::BtOpt>(),
7428        HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra>(),
7429        HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra2>(),
7430    ];
7431    for profile in profiles {
7432        assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 57);
7433    }
7434}
7435
7436#[test]
7437fn sufficient_match_len_is_capped_by_opt_num() {
7438    let mut hc = HcMatchGenerator::new(1 << 20);
7439    hc.hc.target_len = usize::MAX / 2;
7440    let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7441    assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), HC_OPT_NUM - 1);
7442}
7443
7444#[test]
7445#[allow(clippy::borrow_deref_ref)]
7446fn dictionary_entropy_seed_initializes_opt_state_from_tables() {
7447    let mut hc = HcMatchGenerator::new(1 << 20);
7448    hc.configure(
7449        BTULTRA2_HC_CONFIG,
7450        super::strategy::StrategyTag::BtUltra2,
7451        26,
7452    );
7453
7454    let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
7455        b"aaabbbbccccddddeeeeefffffgggg",
7456    );
7457    let ll = crate::fse::fse_encoder::default_ll_table();
7458    let ml = crate::fse::fse_encoder::default_ml_table();
7459    let of = crate::fse::fse_encoder::default_of_table();
7460    hc.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
7461
7462    hc.backend.bt_mut().opt_state.rescale_freqs(
7463        b"abcd",
7464        HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
7465    );
7466
7467    let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
7468        4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7469        1, 1, 1, 1, 1, 1,
7470    ];
7471
7472    assert_ne!(
7473        hc.backend.bt_mut().opt_state.lit_length_freq,
7474        base_ll_freqs,
7475        "dictionary entropy should override fallback LL bootstrap frequencies"
7476    );
7477    assert!(
7478        hc.backend
7479            .bt_mut()
7480            .opt_state
7481            .match_length_freq
7482            .iter()
7483            .any(|&v| v != 1),
7484        "dictionary entropy should seed non-uniform ML frequencies"
7485    );
7486    assert_ne!(
7487        hc.backend.bt_mut().opt_state.off_code_freq[0],
7488        6,
7489        "dictionary entropy should override fallback OF bootstrap frequencies"
7490    );
7491}
7492
7493#[test]
7494#[allow(clippy::borrow_deref_ref)]
7495fn dictionary_fse_seed_applies_without_huffman_seed() {
7496    let mut hc = HcMatchGenerator::new(1 << 20);
7497    hc.configure(
7498        BTULTRA2_HC_CONFIG,
7499        super::strategy::StrategyTag::BtUltra2,
7500        26,
7501    );
7502
7503    let ll = crate::fse::fse_encoder::default_ll_table();
7504    let ml = crate::fse::fse_encoder::default_ml_table();
7505    let of = crate::fse::fse_encoder::default_of_table();
7506    hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
7507    hc.backend.bt_mut().opt_state.rescale_freqs(
7508        b"abcd",
7509        HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
7510    );
7511
7512    let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
7513        4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7514        1, 1, 1, 1, 1, 1,
7515    ];
7516    assert_ne!(
7517        hc.backend.bt_mut().opt_state.lit_length_freq,
7518        base_ll_freqs,
7519        "FSE seed should still override LL bootstrap frequencies without huffman seed"
7520    );
7521    assert!(
7522        hc.backend
7523            .bt_mut()
7524            .opt_state
7525            .match_length_freq
7526            .iter()
7527            .any(|&v| v != 1),
7528        "FSE seed should still seed non-uniform ML frequencies"
7529    );
7530    assert_ne!(
7531        hc.backend.bt_mut().opt_state.off_code_freq[0],
7532        6,
7533        "FSE seed should still override OF bootstrap frequencies without huffman seed"
7534    );
7535}
7536
7537#[test]
7538#[allow(clippy::borrow_deref_ref)]
7539fn dictionary_seed_overrides_predef_price_mode_on_tiny_input() {
7540    let mut hc = HcMatchGenerator::new(1 << 20);
7541    hc.configure(
7542        BTULTRA2_HC_CONFIG,
7543        super::strategy::StrategyTag::BtUltra2,
7544        26,
7545    );
7546
7547    let ll = crate::fse::fse_encoder::default_ll_table();
7548    let ml = crate::fse::fse_encoder::default_ml_table();
7549    let of = crate::fse::fse_encoder::default_of_table();
7550    hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
7551    hc.backend.bt_mut().opt_state.rescale_freqs(
7552        b"abc",
7553        HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
7554    );
7555    assert!(
7556        matches!(
7557            hc.backend.bt_mut().opt_state.price_type,
7558            HcOptPriceType::Dynamic
7559        ),
7560        "dictionary-seeded first block should stay in dynamic mode even for tiny src"
7561    );
7562}
7563
7564#[test]
7565fn lit_length_price_blocksize_max_costs_one_extra_bit() {
7566    let profile_predef = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7567    let mut stats_predef = HcOptState::new();
7568    stats_predef.price_type = HcOptPriceType::Predefined;
7569    let predef_max = profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX);
7570    let predef_prev =
7571        profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX.saturating_sub(1));
7572    assert_eq!(
7573        predef_max,
7574        predef_prev + HC_BITCOST_MULTIPLIER,
7575        "predefined litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
7576    );
7577
7578    let profile_dyn = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7579    let mut stats_dyn = HcOptState::new();
7580    stats_dyn.price_type = HcOptPriceType::Dynamic;
7581    stats_dyn.lit_length_freq.fill(1);
7582    stats_dyn.lit_length_sum = (HC_MAX_LL + 1) as u32;
7583    stats_dyn.match_length_freq.fill(1);
7584    stats_dyn.match_length_sum = (HC_MAX_ML + 1) as u32;
7585    stats_dyn.off_code_freq.fill(1);
7586    stats_dyn.off_code_sum = (HC_MAX_OFF + 1) as u32;
7587    stats_dyn.lit_freq.fill(1);
7588    stats_dyn.lit_sum = (HC_MAX_LIT + 1) as u32;
7589    stats_dyn.set_base_prices(true);
7590    let dyn_max = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX);
7591    let dyn_prev = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX.saturating_sub(1));
7592    assert_eq!(
7593        dyn_max,
7594        dyn_prev + HC_BITCOST_MULTIPLIER,
7595        "dynamic litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
7596    );
7597}
7598
7599#[test]
7600#[allow(clippy::borrow_deref_ref)]
7601fn btultra2_seed_pass_disabled_when_dictionary_entropy_seed_present() {
7602    let mut hc = HcMatchGenerator::new(1 << 20);
7603    hc.configure(
7604        BTULTRA2_HC_CONFIG,
7605        super::strategy::StrategyTag::BtUltra2,
7606        26,
7607    );
7608    let ll = crate::fse::fse_encoder::default_ll_table();
7609    let ml = crate::fse::fse_encoder::default_ml_table();
7610    let of = crate::fse::fse_encoder::default_of_table();
7611    hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
7612    assert!(
7613        !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
7614        "dictionary-seeded first block should skip btultra2 warmup pass"
7615    );
7616}
7617
7618#[test]
7619fn btultra2_seed_pass_disabled_when_prefix_history_exists() {
7620    let mut hc = HcMatchGenerator::new(1 << 20);
7621    hc.configure(
7622        BTULTRA2_HC_CONFIG,
7623        super::strategy::StrategyTag::BtUltra2,
7624        26,
7625    );
7626    hc.table.history_abs_start = 17;
7627    hc.table.push_test_chunk(b"abcdefghijklmnop".to_vec());
7628    assert!(
7629        !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 9),
7630        "btultra2 warmup must be first-block only (no prefix history)"
7631    );
7632}
7633
7634#[test]
7635fn btultra2_seed_pass_disabled_for_tiny_block() {
7636    let mut hc = HcMatchGenerator::new(1 << 20);
7637    hc.configure(
7638        BTULTRA2_HC_CONFIG,
7639        super::strategy::StrategyTag::BtUltra2,
7640        26,
7641    );
7642    assert!(
7643        !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD),
7644        "btultra2 warmup should not run at or below predefined threshold"
7645    );
7646}
7647
7648#[test]
7649fn btultra2_seed_pass_disabled_after_stats_initialized() {
7650    let mut hc = HcMatchGenerator::new(1 << 20);
7651    hc.configure(
7652        BTULTRA2_HC_CONFIG,
7653        super::strategy::StrategyTag::BtUltra2,
7654        26,
7655    );
7656    hc.backend.bt_mut().opt_state.lit_length_sum = 1;
7657    assert!(
7658        !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
7659        "btultra2 warmup should run only for first block before stats are initialized"
7660    );
7661}
7662
7663#[test]
7664fn btultra2_seed_pass_disabled_when_not_at_frame_start() {
7665    let mut hc = HcMatchGenerator::new(1 << 20);
7666    hc.configure(
7667        BTULTRA2_HC_CONFIG,
7668        super::strategy::StrategyTag::BtUltra2,
7669        26,
7670    );
7671    // Simulate non-first block state: current block has no prefix in deque,
7672    // but total produced window already includes prior output.
7673    hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
7674    // window_size set manually above to simulate prior output; record the
7675    // current block as one live chunk (seed-pass check reads lengths, not bytes).
7676    hc.table.chunk_lens.push_back(HC_PREDEF_THRESHOLD + 32);
7677    assert!(
7678        !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
7679        "btultra2 warmup must not run after frame start"
7680    );
7681}
7682
7683#[test]
7684fn btultra2_seed_pass_disabled_when_ldm_sequences_exist() {
7685    let mut hc = HcMatchGenerator::new(1 << 20);
7686    hc.configure(
7687        BTULTRA2_HC_CONFIG,
7688        super::strategy::StrategyTag::BtUltra2,
7689        26,
7690    );
7691    hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
7692    hc.table.chunk_lens.push_back(HC_PREDEF_THRESHOLD + 64);
7693    hc.backend.bt_mut().ldm_sequences.push(HcRawSeq {
7694        lit_length: 8,
7695        offset: 16,
7696        match_length: 32,
7697    });
7698    assert!(
7699        !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
7700        "btultra2 warmup must not run when LDM already produced sequences"
7701    );
7702}
7703
7704#[test]
7705fn literal_price_uses_eight_bits_when_literals_uncompressed() {
7706    let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
7707    let mut stats = HcOptState::new();
7708    stats.set_literals_compressed_for_tests(false);
7709    stats.price_type = HcOptPriceType::Predefined;
7710    assert_eq!(
7711        profile.literal_price(&stats, b'a'),
7712        8 * HC_BITCOST_MULTIPLIER,
7713        "uncompressed literals should cost 8 bits regardless of price mode"
7714    );
7715}
7716
7717#[test]
7718fn update_stats_skips_literal_frequencies_when_uncompressed() {
7719    let mut stats = HcOptState::new();
7720    stats.set_literals_compressed_for_tests(false);
7721    stats.update_stats(3, b"abc", 4, 8);
7722    assert_eq!(
7723        stats.lit_sum, 0,
7724        "literal sum must remain unchanged when literal compression is disabled"
7725    );
7726    assert_eq!(
7727        stats.lit_freq.iter().copied().sum::<u32>(),
7728        0,
7729        "literal frequencies must not be updated when literal compression is disabled"
7730    );
7731    assert_eq!(
7732        stats.lit_length_sum, 1,
7733        "literal-length stats still update for sequence modeling"
7734    );
7735    assert_eq!(
7736        stats.match_length_sum, 1,
7737        "match-length stats still update for sequence modeling"
7738    );
7739    assert_eq!(
7740        stats.off_code_sum, 1,
7741        "offset-code stats still update for sequence modeling"
7742    );
7743}
7744
7745#[test]
7746#[allow(clippy::borrow_deref_ref)]
7747fn dictionary_huffman_seed_ignored_when_literals_uncompressed() {
7748    let mut stats = HcOptState::new();
7749    stats.set_literals_compressed_for_tests(false);
7750    let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
7751        b"aaaaabbbbcccddeeff00112233445566778899",
7752    );
7753    let ll = crate::fse::fse_encoder::default_ll_table();
7754    let ml = crate::fse::fse_encoder::default_ml_table();
7755    let of = crate::fse::fse_encoder::default_of_table();
7756    stats.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
7757    stats.rescale_freqs(
7758        b"abcd",
7759        HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
7760    );
7761    assert_eq!(
7762        stats.lit_sum, 0,
7763        "literal sum must stay zero when literals are uncompressed"
7764    );
7765    assert_eq!(
7766        stats.lit_freq.iter().copied().sum::<u32>(),
7767        0,
7768        "literal frequencies must ignore dictionary huffman seed when uncompressed"
7769    );
7770}
7771
7772#[test]
7773fn hc_repcode_candidates_respect_litlen_dependent_rep_order() {
7774    let mut hc = HcMatchGenerator::new(64);
7775    hc.table.history = b"xxxxxxABCDEFABCDEF".to_vec();
7776    hc.table.history_start = 0;
7777    hc.table.history_abs_start = 0;
7778
7779    let abs_pos = 12usize; // points at second "ABCDEF"
7780    let current_abs_end = hc.table.history.len();
7781    let reps = [6u32, 3u32, 9u32];
7782
7783    let mut lit_pos_candidates = Vec::new();
7784    hc.hc.for_each_repcode_candidate_with_reps(
7785        &hc.table,
7786        abs_pos,
7787        1,
7788        reps,
7789        current_abs_end,
7790        HC_OPT_MIN_MATCH_LEN,
7791        |c| {
7792            lit_pos_candidates.push(c.offset);
7793        },
7794    );
7795    assert!(
7796        lit_pos_candidates.contains(&6),
7797        "when lit_len>0, rep0 should be considered and match"
7798    );
7799
7800    let mut ll0_candidates = Vec::new();
7801    hc.hc.for_each_repcode_candidate_with_reps(
7802        &hc.table,
7803        abs_pos,
7804        0,
7805        reps,
7806        current_abs_end,
7807        HC_OPT_MIN_MATCH_LEN,
7808        |c| {
7809            ll0_candidates.push(c.offset);
7810        },
7811    );
7812    assert!(
7813        !ll0_candidates.contains(&6),
7814        "when lit_len==0, rep0 is not directly eligible (ll0 semantics)"
7815    );
7816}
7817
7818#[test]
7819fn hc_collect_optimal_candidates_keeps_reps_when_chain_depth_zero() {
7820    let mut hc = HcMatchGenerator::new(64);
7821    hc.hc.search_depth = 0;
7822    hc.table.history = b"xyzxyzxyzxyz".to_vec();
7823    hc.table.history_start = 0;
7824    hc.table.history_abs_start = 0;
7825
7826    let abs_pos = 6usize;
7827    let current_abs_end = hc.table.history.len();
7828    let profile = HcOptimalCostProfile {
7829        max_chain_depth: 0,
7830        sufficient_match_len: usize::MAX / 2,
7831        accurate: false,
7832        favor_small_offsets: false,
7833    };
7834    let mut out = Vec::new();
7835    hc.collect_optimal_candidates(
7836        abs_pos,
7837        current_abs_end,
7838        profile,
7839        HcCandidateQuery {
7840            reps: [3, 6, 9],
7841            lit_len: 1,
7842            ldm_candidate: None,
7843        },
7844        &mut out,
7845    );
7846    assert!(
7847        !out.is_empty(),
7848        "rep candidates should remain available even when chain depth is zero"
7849    );
7850    assert!(
7851        out.iter().any(|c| c.offset == 3),
7852        "rep0 candidate should be retained"
7853    );
7854}
7855
7856#[test]
7857fn hc_collect_optimal_candidates_rep_tail_match_skips_chain_probe() {
7858    let mut hc = HcMatchGenerator::new(64);
7859    hc.table.history = b"aaaaaaaaaa".to_vec();
7860    hc.table.history_start = 0;
7861    hc.table.history_abs_start = 0;
7862    hc.table.position_base = 0;
7863    hc.hc.search_depth = 32;
7864    let abs_pos = 6usize;
7865    hc.table.ensure_tables();
7866    hc.table.insert_positions(0, abs_pos);
7867
7868    let profile = HcOptimalCostProfile {
7869        max_chain_depth: 32,
7870        sufficient_match_len: usize::MAX / 2,
7871        accurate: true,
7872        favor_small_offsets: false,
7873    };
7874    let mut out = Vec::new();
7875    hc.collect_optimal_candidates(
7876        abs_pos,
7877        hc.table.history.len(),
7878        profile,
7879        HcCandidateQuery {
7880            reps: [1, 4, 8],
7881            lit_len: 1,
7882            ldm_candidate: None,
7883        },
7884        &mut out,
7885    );
7886
7887    assert!(
7888        out.iter()
7889            .all(|candidate| matches!(candidate.offset, 1 | 4)),
7890        "terminal rep match should return before chain probing adds non-rep offsets"
7891    );
7892}
7893
7894#[test]
7895fn hc_collect_optimal_candidates_long_chain_match_advances_skip_window() {
7896    let mut hc = HcMatchGenerator::new(128);
7897    hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
7898    hc.table.history_start = 0;
7899    hc.table.history_abs_start = 0;
7900    hc.table.position_base = 0;
7901    hc.hc.search_depth = 32;
7902    let abs_pos = 9usize;
7903    hc.table.ensure_tables();
7904    hc.table.insert_positions(0, abs_pos);
7905    hc.table.skip_insert_until_abs = 0;
7906
7907    let profile = HcOptimalCostProfile {
7908        max_chain_depth: 32,
7909        sufficient_match_len: usize::MAX / 2,
7910        accurate: true,
7911        favor_small_offsets: false,
7912    };
7913    let mut out = Vec::new();
7914    hc.collect_optimal_candidates(
7915        abs_pos,
7916        hc.table.history.len(),
7917        profile,
7918        HcCandidateQuery {
7919            reps: [1, 4, 8],
7920            lit_len: 1,
7921            ldm_candidate: None,
7922        },
7923        &mut out,
7924    );
7925
7926    assert!(
7927        hc.table.skip_insert_until_abs > abs_pos,
7928        "long chain match should advance skip window to avoid redundant immediate insertions"
7929    );
7930}
7931
7932#[test]
7933fn hc_collect_optimal_candidates_chain_fast_skip_uses_match_end_minus_8() {
7934    let mut hc = HcMatchGenerator::new(128);
7935    hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
7936    hc.table.history_start = 0;
7937    hc.table.history_abs_start = 0;
7938    hc.table.position_base = 0;
7939    hc.hc.search_depth = 32;
7940    let abs_pos = 9usize;
7941    hc.table.ensure_tables();
7942    hc.table.insert_positions(0, abs_pos);
7943    hc.table.skip_insert_until_abs = 0;
7944
7945    let profile = HcOptimalCostProfile {
7946        max_chain_depth: 32,
7947        sufficient_match_len: 10,
7948        accurate: true,
7949        favor_small_offsets: false,
7950    };
7951    let mut out = Vec::new();
7952    hc.collect_optimal_candidates(
7953        abs_pos,
7954        hc.table.history.len(),
7955        profile,
7956        HcCandidateQuery {
7957            reps: [1, 4, 8],
7958            lit_len: 1,
7959            ldm_candidate: None,
7960        },
7961        &mut out,
7962    );
7963
7964    let best_match_end = out
7965        .iter()
7966        .map(|candidate| candidate.start.saturating_add(candidate.match_len))
7967        .max()
7968        .expect("expected at least one candidate");
7969    assert!(
7970        hc.table.skip_insert_until_abs > abs_pos,
7971        "chain fast-skip must advance past current position"
7972    );
7973    assert!(
7974        hc.table.skip_insert_until_abs <= best_match_end.saturating_sub(8),
7975        "chain fast-skip must not exceed donor-style matchEndIdx - 8 bound"
7976    );
7977}
7978
7979#[test]
7980fn hc_collect_optimal_candidates_advances_skip_window_on_plain_bt_path() {
7981    let mut hc = HcMatchGenerator::new(256);
7982    hc.table.history = b"abcdefghijklmnop".to_vec();
7983    hc.table.history_start = 0;
7984    hc.table.history_abs_start = 0;
7985    hc.table.position_base = 0;
7986    hc.hc.search_depth = 0;
7987    hc.table.ensure_tables();
7988
7989    let abs_pos = 8usize;
7990    hc.table.skip_insert_until_abs = 0;
7991
7992    let profile = HcOptimalCostProfile {
7993        max_chain_depth: 0,
7994        sufficient_match_len: usize::MAX / 2,
7995        accurate: true,
7996        favor_small_offsets: false,
7997    };
7998    let mut out = Vec::new();
7999    hc.collect_optimal_candidates(
8000        abs_pos,
8001        hc.table.history.len(),
8002        profile,
8003        HcCandidateQuery {
8004            reps: [1, 4, 8],
8005            lit_len: 1,
8006            ldm_candidate: None,
8007        },
8008        &mut out,
8009    );
8010
8011    assert_eq!(
8012        hc.table.skip_insert_until_abs,
8013        abs_pos.saturating_add(1),
8014        "plain BT path should advance skip window by 1 via donor matchEndIdx baseline"
8015    );
8016}
8017
8018// Removed: the three `hc_collect_optimal_candidates_*_hash3_*` /
8019// `hc_hash3_tail_match_*` tests forced `search_depth = 0` together
8020// with `hash3_log != 0`, an HC-chain-walker-only fixture state that
8021// production never reaches (hash3 is BtUltra2-only and BtUltra2 always
8022// runs `search_depth = 512`). They depended on the `has_hash3 =>
8023// BtUltra2` escape hatch in the test dispatcher; with that hatch gone
8024// (CR review on PR #123) and the dispatcher routing purely from
8025// `self.strategy_tag`, there is no production-shaped configuration
8026// that reproduces what those tests asserted. The corresponding hash3
8027// invariants are exercised end-to-end by the existing level22 roundtrip
8028// + donor-parity ratio gate.
8029
8030#[test]
8031fn hc_ldm_candidates_are_merged_into_optimal_candidates() {
8032    let mut hc = HcMatchGenerator::new(512);
8033    hc.table.history = (0..256).map(|i| (i % 251) as u8).collect();
8034    hc.table.history_start = 0;
8035    hc.table.history_abs_start = 0;
8036
8037    let abs_pos = 128usize;
8038    let current_abs_end = 256usize;
8039    let ldm = MatchCandidate {
8040        start: abs_pos,
8041        offset: 96,
8042        match_len: 40,
8043    };
8044
8045    let profile = HcOptimalCostProfile {
8046        max_chain_depth: 0,
8047        sufficient_match_len: usize::MAX / 2,
8048        accurate: true,
8049        favor_small_offsets: false,
8050    };
8051    let mut out = Vec::new();
8052    hc.collect_optimal_candidates(
8053        abs_pos,
8054        current_abs_end,
8055        profile,
8056        HcCandidateQuery {
8057            reps: [1, 4, 8],
8058            lit_len: 1,
8059            ldm_candidate: Some(ldm),
8060        },
8061        &mut out,
8062    );
8063    assert!(
8064        out.iter().any(
8065            |candidate| candidate.offset == ldm.offset && candidate.match_len == ldm.match_len
8066        ),
8067        "LDM candidate should be present in optimal candidate set"
8068    );
8069}
8070
8071#[test]
8072fn btultra_and_btultra2_both_keep_dictionary_candidates() {
8073    // Routes the BtUltra2 / BtUltra fixture through the production
8074    // `configure()` path so derived state (`hash3_log`, `is_btultra2`,
8075    // `uses_bt`, `backend`) stays consistent — manually flipping the
8076    // strategy flags here used to leave `hash3_log` / `hash3_table` in
8077    // the previous mode's shape and trip the
8078    // `Strategy::USE_HASH3 ⇒ hash3_log != 0` debug invariant inside
8079    // `collect_optimal_candidates_initialized_body`.
8080    use super::strategy::StrategyTag;
8081
8082    let test_config = HcConfig {
8083        hash_log: 23,
8084        chain_log: 22,
8085        search_depth: 32,
8086        target_len: 256,
8087        search_mls: 4,
8088    };
8089    let window_log = 20u8;
8090
8091    let prepare_history = |hc: &mut HcMatchGenerator, abs_pos: usize| {
8092        hc.table.history = alloc::vec![0u8; 160];
8093        for i in 0..64 {
8094            hc.table.history[i] = b'a' + (i % 7) as u8;
8095        }
8096        for i in 64..160 {
8097            hc.table.history[i] = b'k' + (i % 5) as u8;
8098        }
8099        for i in 0..24 {
8100            hc.table.history[abs_pos + i] = hc.table.history[16 + i];
8101        }
8102        hc.table.history_start = 0;
8103        hc.table.history_abs_start = 0;
8104        hc.table.position_base = 0;
8105        hc.table.ensure_tables();
8106        hc.table.insert_positions(0, abs_pos);
8107        hc.table.dictionary_limit_abs = Some(64);
8108        hc.table.skip_insert_until_abs = 0;
8109    };
8110
8111    let profile = HcOptimalCostProfile {
8112        max_chain_depth: 32,
8113        sufficient_match_len: usize::MAX / 2,
8114        accurate: true,
8115        favor_small_offsets: false,
8116    };
8117    let abs_pos = 96usize;
8118    let mut out = Vec::new();
8119
8120    let mut hc = HcMatchGenerator::new(256);
8121    hc.configure(test_config, StrategyTag::BtUltra2, window_log);
8122    prepare_history(&mut hc, abs_pos);
8123    hc.collect_optimal_candidates(
8124        abs_pos,
8125        160,
8126        profile,
8127        HcCandidateQuery {
8128            reps: [1, 4, 8],
8129            lit_len: 1,
8130            ldm_candidate: None,
8131        },
8132        &mut out,
8133    );
8134    assert!(
8135        out.iter().any(|candidate| candidate.offset >= 32),
8136        "btultra2 should retain dictionary candidates on donor-parity path"
8137    );
8138
8139    let mut hc = HcMatchGenerator::new(256);
8140    hc.configure(test_config, StrategyTag::BtUltra, window_log);
8141    prepare_history(&mut hc, abs_pos);
8142    hc.collect_optimal_candidates(
8143        abs_pos,
8144        160,
8145        profile,
8146        HcCandidateQuery {
8147            reps: [1, 4, 8],
8148            lit_len: 1,
8149            ldm_candidate: None,
8150        },
8151        &mut out,
8152    );
8153    assert!(
8154        out.iter().any(|candidate| candidate.offset >= 32),
8155        "btultra should retain dictionary candidates"
8156    );
8157}
8158
8159#[test]
8160fn driver_small_source_hint_shrinks_dfast_hash_tables() {
8161    let mut driver = MatchGeneratorDriver::new(32, 2);
8162
8163    driver.reset(CompressionLevel::Level(3));
8164    let mut space = driver.get_next_space();
8165    space[..12].copy_from_slice(b"abcabcabcabc");
8166    space.truncate(12);
8167    driver.commit_space(space);
8168    driver.skip_matching_with_hint(None);
8169    // Donor-parity split sizes: long-hash = DFAST_HASH_BITS,
8170    // short-hash = DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA.
8171    let full_long = driver.dfast_matcher().long_hash.len();
8172    let full_short = driver.dfast_matcher().short_hash.len();
8173    assert_eq!(full_long, 1 << DFAST_HASH_BITS);
8174    assert_eq!(
8175        full_short,
8176        1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA)
8177    );
8178
8179    driver.set_source_size_hint(1024);
8180    driver.reset(CompressionLevel::Level(3));
8181    let mut space = driver.get_next_space();
8182    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
8183    space.truncate(12);
8184    driver.commit_space(space);
8185    driver.skip_matching_with_hint(None);
8186    let hinted_long = driver.dfast_matcher().long_hash.len();
8187    let hinted_short = driver.dfast_matcher().short_hash.len();
8188
8189    // The wire `window_log` stays at its floor (decoder-interop), but the
8190    // internal dfast tables are sized from the RAW 1 KiB source, not the
8191    // floored window: `table_window = 1 << ceil_log2(1024) = 1 << 10`, so
8192    // both tables land at the `MIN_WINDOW_LOG` floor (the long table at
8193    // `dfast_hash_bits_for_window(1 << 10) = 10`, the short table one
8194    // `DFAST_SHORT_HASH_BITS_DELTA` step below but clamped back up to
8195    // `MIN_WINDOW_LOG`).
8196    assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
8197    assert_eq!(hinted_long, 1 << MIN_WINDOW_LOG);
8198    assert_eq!(hinted_short, 1 << MIN_WINDOW_LOG);
8199    assert!(
8200        hinted_long < full_long && hinted_short < full_short,
8201        "tiny source hint should reduce both dfast tables"
8202    );
8203}
8204
8205#[test]
8206fn driver_huge_source_hint_does_not_overflow_table_window_shift() {
8207    // Regression: the Dfast / Row table-window sizing in `reset` derives a
8208    // shift from `ceil_log2(hint)`. A hint >= 2^63 + 1 makes that shift 64,
8209    // and `1usize << 64` panics in debug / wraps to 0 in release before the
8210    // `.min(max_window_size)` cap can apply. A `u64::MAX` pledged source size
8211    // must size the table to the real window, never panic or wrap to zero.
8212    let mut driver = MatchGeneratorDriver::new(32, 2);
8213    driver.set_source_size_hint(u64::MAX);
8214    driver.reset(CompressionLevel::Level(3));
8215
8216    let mut space = driver.get_next_space();
8217    space[..12].copy_from_slice(b"abcabcabcabc");
8218    space.truncate(12);
8219    driver.commit_space(space);
8220    driver.skip_matching_with_hint(None);
8221
8222    assert!(
8223        driver.dfast_matcher().long_hash.len() >= 1 << MIN_WINDOW_LOG,
8224        "huge hint must size the dfast table from the real window, not wrap to zero"
8225    );
8226}
8227
8228#[test]
8229fn driver_huge_source_hint_with_dict_does_not_overflow_hc_reserve() {
8230    // Regression: the HC/BT history-mirror pre-size adds the dictionary
8231    // hint to the source-size hint before `reserve_history` clamps to the
8232    // window ceiling. A `u64::MAX` pledged source size (the "unknown size"
8233    // sentinel) plus any positive dictionary hint overflows `usize` in
8234    // `(src as usize) + dict_hint` — debug panic / release wrap on 64-bit,
8235    // and `src as usize` truncation on 32-bit targets. Level 16 (BtOpt)
8236    // routes through the HashChain/BT storage arm that owns this reserve.
8237    // Must size the mirror to the real window, never panic, wrap, or
8238    // truncate.
8239    let mut driver = MatchGeneratorDriver::new(32, 2);
8240    driver.set_source_size_hint(u64::MAX);
8241    driver.set_dictionary_size_hint(64 * 1024);
8242    driver.reset(CompressionLevel::Level(16));
8243
8244    // The saturated `usize::MAX` reserve target must be clamped to the HC
8245    // history ceiling, not reserved literally (which would OOM/panic). Level 16
8246    // has window_log 22, so the ceiling is `window + window/4 + one block`
8247    // (the `reserve_history` formula). Assert the reserve actually reached it —
8248    // a no-panic-only check would also pass on an under-reserved mirror.
8249    let window = 1usize << 22;
8250    let expected_history_ceiling = window + (window >> 2) + crate::common::MAX_BLOCK_SIZE as usize;
8251    assert!(
8252        driver.hc_matcher().table.history.capacity() >= expected_history_ceiling,
8253        "huge source + dict hint must reserve the clamped HC history ceiling, got {}",
8254        driver.hc_matcher().table.history.capacity()
8255    );
8256
8257    let mut space = driver.get_next_space();
8258    space[..12].copy_from_slice(b"abcabcabcabc");
8259    space.truncate(12);
8260    driver.commit_space(space);
8261    driver.skip_matching_with_hint(None);
8262}
8263
8264#[test]
8265fn driver_small_source_hint_shrinks_row_hash_tables() {
8266    let mut driver = MatchGeneratorDriver::new(32, 2);
8267
8268    driver.reset(CompressionLevel::Level(5));
8269    let mut space = driver.get_next_space();
8270    space[..12].copy_from_slice(b"abcabcabcabc");
8271    space.truncate(12);
8272    driver.commit_space(space);
8273    driver.skip_matching_with_hint(None);
8274    let full_rows = driver.row_matcher().row_heads.len();
8275    // Level 5 uses the upstream row_log (clamp(searchLog=3, 4, 6) = 4) and the
8276    // upstream L5 hashLog (`ZSTD_getCParams(5,..).hashLog` = 19), so the row
8277    // count is 1 << (ROW_L5.hash_bits - ROW_L5.row_log).
8278    assert_eq!(full_rows, 1 << (ROW_L5.hash_bits - ROW_L5.row_log));
8279
8280    driver.set_source_size_hint(1024);
8281    driver.reset(CompressionLevel::Level(5));
8282    let mut space = driver.get_next_space();
8283    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
8284    space.truncate(12);
8285    driver.commit_space(space);
8286    driver.skip_matching_with_hint(None);
8287    let hinted_rows = driver.row_matcher().row_heads.len();
8288
8289    // Wire `window_log` stays floored, but the row hash table is sized from
8290    // the RAW 1 KiB source: `table_window = 1 << 10`, so
8291    // `row_hash_bits_for_window(1 << 10) = 11` (donor `hashLog <=
8292    // windowLog + 1`) and the row count is `1 << (11 - ROW_L5.row_log)`.
8293    assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
8294    assert_eq!(
8295        hinted_rows,
8296        1 << ((MIN_WINDOW_LOG as usize) + 1 - ROW_L5.row_log)
8297    );
8298    assert!(
8299        hinted_rows < full_rows,
8300        "tiny source hint should reduce row hash table footprint"
8301    );
8302}
8303
8304#[test]
8305fn row_matches_roundtrip_multi_block_pattern() {
8306    let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
8307    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
8308    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
8309
8310    let mut matcher = RowMatchGenerator::new(1 << 22);
8311    matcher.configure(ROW_CONFIG);
8312    matcher.ensure_tables();
8313    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
8314        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
8315        Sequence::Triple {
8316            literals,
8317            offset,
8318            match_len,
8319        } => {
8320            decoded.extend_from_slice(literals);
8321            let start = decoded.len() - offset;
8322            for i in 0..match_len {
8323                let byte = decoded[start + i];
8324                decoded.push(byte);
8325            }
8326        }
8327    };
8328
8329    matcher.add_data(first_block.clone(), |_| {});
8330    let mut history = Vec::new();
8331    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
8332    assert_eq!(history, first_block);
8333
8334    matcher.add_data(second_block.clone(), |_| {});
8335    let prefix_len = history.len();
8336    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
8337
8338    assert_eq!(&history[prefix_len..], second_block.as_slice());
8339
8340    // Force a literals-only pass so the Sequence::Literals arm is exercised.
8341    let third_block: Vec<u8> = (0u8..=255).collect();
8342    matcher.add_data(third_block.clone(), |_| {});
8343    let third_prefix = history.len();
8344    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
8345    assert_eq!(&history[third_prefix..], third_block.as_slice());
8346}
8347
8348#[test]
8349fn row_short_block_emits_literals_only() {
8350    let mut matcher = RowMatchGenerator::new(1 << 22);
8351    matcher.configure(ROW_CONFIG);
8352
8353    matcher.add_data(b"abcde".to_vec(), |_| {});
8354
8355    let mut saw_triple = false;
8356    let mut reconstructed = Vec::new();
8357    matcher.start_matching(|seq| match seq {
8358        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
8359        Sequence::Triple { .. } => saw_triple = true,
8360    });
8361
8362    assert!(
8363        !saw_triple,
8364        "row backend must not emit triples for short blocks"
8365    );
8366    assert_eq!(reconstructed, b"abcde");
8367
8368    // Then feed a clearly matchable block and ensure the Triple arm is reachable.
8369    saw_triple = false;
8370    matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
8371    matcher.start_matching(|seq| {
8372        if let Sequence::Triple { .. } = seq {
8373            saw_triple = true;
8374        }
8375    });
8376    assert!(
8377        saw_triple,
8378        "row backend should emit triples on repeated data"
8379    );
8380}
8381
8382#[test]
8383fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
8384    let mut matcher = RowMatchGenerator::new(1 << 22);
8385    matcher.configure(ROW_CONFIG);
8386    matcher.add_data(b"abcabc".to_vec(), |_| {});
8387    // Build the row tables before probing: the lookahead path reaches
8388    // `row_candidate` -> `row_heads[..]` once the accept floor is small
8389    // enough to pass the length gate, so the tables must be allocated
8390    // (production always calls this before any candidate probe).
8391    matcher.ensure_tables();
8392
8393    let best = MatchCandidate {
8394        start: 0,
8395        offset: 1,
8396        match_len: ROW_MIN_MATCH_LEN,
8397    };
8398    let picked = matcher
8399        .pick_lazy_match(0, 0, Some(best))
8400        .expect("best candidate must survive");
8401
8402    assert_eq!(picked.start, best.start);
8403    assert_eq!(picked.offset, best.offset);
8404    assert_eq!(picked.match_len, best.match_len);
8405}
8406
8407#[test]
8408fn row_backfills_previous_block_tail_for_cross_boundary_match() {
8409    let mut matcher = RowMatchGenerator::new(1 << 22);
8410    matcher.configure(ROW_CONFIG);
8411
8412    let mut first_block = alloc::vec![0xA5; 64];
8413    first_block.extend_from_slice(b"XYZ");
8414    let second_block = b"XYZXYZtail".to_vec();
8415
8416    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
8417        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
8418        Sequence::Triple {
8419            literals,
8420            offset,
8421            match_len,
8422        } => {
8423            decoded.extend_from_slice(literals);
8424            let start = decoded.len() - offset;
8425            for i in 0..match_len {
8426                let byte = decoded[start + i];
8427                decoded.push(byte);
8428            }
8429        }
8430    };
8431
8432    matcher.add_data(first_block.clone(), |_| {});
8433    let mut reconstructed = Vec::new();
8434    matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
8435    assert_eq!(reconstructed, first_block);
8436
8437    matcher.add_data(second_block.clone(), |_| {});
8438    let mut saw_cross_boundary = false;
8439    let prefix_len = reconstructed.len();
8440    matcher.start_matching(|seq| {
8441        if let Sequence::Triple {
8442            literals,
8443            offset,
8444            match_len,
8445        } = seq
8446            && literals.is_empty()
8447            && offset == 3
8448            && match_len >= ROW_MIN_MATCH_LEN
8449        {
8450            saw_cross_boundary = true;
8451        }
8452        replay_sequence(&mut reconstructed, seq);
8453    });
8454
8455    assert!(
8456        saw_cross_boundary,
8457        "row matcher should reuse the 3-byte previous-block tail"
8458    );
8459    assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
8460}
8461
8462#[test]
8463fn row_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
8464    let data = deterministic_high_entropy_bytes(0xA713_9C5D_44E2_10B1, 4096);
8465
8466    let mut dense = RowMatchGenerator::new(1 << 22);
8467    dense.configure(ROW_CONFIG);
8468    dense.add_data(data.clone(), |_| {});
8469    dense.skip_matching_with_hint(Some(false));
8470    let dense_slots = dense
8471        .row_positions
8472        .iter()
8473        .filter(|&&pos| pos != ROW_EMPTY_SLOT)
8474        .count();
8475
8476    let mut sparse = RowMatchGenerator::new(1 << 22);
8477    sparse.configure(ROW_CONFIG);
8478    sparse.add_data(data, |_| {});
8479    sparse.skip_matching_with_hint(Some(true));
8480    let sparse_slots = sparse
8481        .row_positions
8482        .iter()
8483        .filter(|&&pos| pos != ROW_EMPTY_SLOT)
8484        .count();
8485
8486    assert!(
8487        sparse_slots < dense_slots,
8488        "incompressible hint should seed fewer row slots (sparse={sparse_slots}, dense={dense_slots})"
8489    );
8490}
8491
8492/// Regression for the `None` arm of `skip_matching_with_hint`: the
8493/// row table must NOT receive dense inserts across the skipped range.
8494/// Donor parity (`ZSTD_row_fillHashCache` only pre-fills the next-scan
8495/// cache, not the skipped block's interior) trades cross-block
8496/// matches into the skipped interior for the per-block O(block_size)
8497/// insert cost.
8498///
8499/// At input < 1 block (4096 B with default 128 KiB block boundary),
8500/// the only positions in the row table after the call should be those
8501/// produced by the `backfill_start` lookback at the block's start
8502/// (≤ `ROW_HASH_KEY_LEN - 1` positions when block_start <
8503/// ROW_HASH_KEY_LEN). For `current_abs_start == 0`, even that backfill
8504/// is empty — so the table stays fully empty.
8505#[test]
8506fn row_skip_matching_with_none_hint_leaves_interior_empty() {
8507    let data = deterministic_high_entropy_bytes(0x9B47_F2A1_8C5E_3306, 4096);
8508
8509    let mut none_hint = RowMatchGenerator::new(1 << 22);
8510    none_hint.configure(ROW_CONFIG);
8511    none_hint.add_data(data.clone(), |_| {});
8512    none_hint.skip_matching_with_hint(None);
8513    let none_slots = none_hint
8514        .row_positions
8515        .iter()
8516        .filter(|&&pos| pos != ROW_EMPTY_SLOT)
8517        .count();
8518
8519    // Dense (Some(false), dict-priming path) for comparison — that
8520    // path inserts every position in the skipped range.
8521    let mut dense = RowMatchGenerator::new(1 << 22);
8522    dense.configure(ROW_CONFIG);
8523    dense.add_data(data, |_| {});
8524    dense.skip_matching_with_hint(Some(false));
8525    let dense_slots = dense
8526        .row_positions
8527        .iter()
8528        .filter(|&&pos| pos != ROW_EMPTY_SLOT)
8529        .count();
8530
8531    // Two assertions pin the contract:
8532    // 1) None hint is dramatically sparser than dense (the whole point).
8533    // 2) None hint at block-start==0 inserts ZERO positions (no
8534    //    backfill possible before position 0).
8535    assert_eq!(
8536        none_slots, 0,
8537        "None hint at block_start=0 must leave row table fully empty \
8538         (donor parity — interior NOT inserted, no pre-block backfill possible)",
8539    );
8540    assert!(
8541        dense_slots > 0,
8542        "Some(false) dict-priming path must still insert densely \
8543         (sanity check: control case for the `none_slots == 0` assertion)",
8544    );
8545}
8546
8547#[test]
8548fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
8549    let mut driver = MatchGeneratorDriver::new(32, 2);
8550
8551    driver.reset(CompressionLevel::Level(3));
8552    let mut space = driver.get_next_space();
8553    space[..12].copy_from_slice(b"abcabcabcabc");
8554    space.truncate(12);
8555    driver.commit_space(space);
8556    driver.skip_matching_with_hint(None);
8557
8558    // Donor-parity split: long-hash at DFAST_HASH_BITS, short-hash one
8559    // bit smaller (DFAST_SHORT_HASH_BITS_DELTA = 1, matching donor
8560    // `chainLog = hashLog - 1` for dfast levels).
8561    let long_len = driver.dfast_matcher().long_hash.len();
8562    let short_len = driver.dfast_matcher().short_hash.len();
8563    assert_eq!(
8564        long_len,
8565        1 << DFAST_HASH_BITS,
8566        "unhinted Level(2) should keep default long-hash table size"
8567    );
8568    assert_eq!(
8569        short_len,
8570        1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA),
8571        "unhinted Level(2) short-hash should be one bit smaller than long-hash"
8572    );
8573}
8574
8575#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
8576#[test]
8577fn simple_backend_rejects_undersized_pooled_suffix_store() {
8578    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
8579    driver.reset(CompressionLevel::Fastest);
8580
8581    driver.suffix_pool.push(SuffixStore::with_capacity(1024));
8582
8583    let mut space = driver.get_next_space();
8584    space.clear();
8585    space.resize(4096, 0xAB);
8586    driver.commit_space(space);
8587
8588    let last_suffix_slots = driver
8589        .simple()
8590        .window
8591        .last()
8592        .expect("window entry must exist after commit")
8593        .suffixes
8594        .slots
8595        .len();
8596    assert!(
8597        last_suffix_slots >= 4096,
8598        "undersized pooled suffix store must not be reused for larger blocks"
8599    );
8600}
8601
8602#[test]
8603fn source_hint_clamps_driver_slice_size_to_window() {
8604    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
8605    driver.set_source_size_hint(1024);
8606    driver.reset(CompressionLevel::Default);
8607
8608    let window = driver.window_size() as usize;
8609    assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
8610    assert_eq!(driver.slice_size, window);
8611
8612    let space = driver.get_next_space();
8613    assert_eq!(space.len(), window);
8614    driver.commit_space(space);
8615}
8616
8617#[test]
8618fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
8619    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
8620    driver.reset(CompressionLevel::Default);
8621
8622    let large = driver.get_next_space();
8623    let large_capacity = large.capacity();
8624    assert!(large_capacity >= 128 * 1024);
8625    driver.commit_space(large);
8626
8627    driver.set_source_size_hint(1024);
8628    driver.reset(CompressionLevel::Default);
8629
8630    let small = driver.get_next_space();
8631    assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
8632    assert!(
8633        small.capacity() >= large_capacity,
8634        "pooled buffer capacity should be preserved to avoid shrink/grow churn"
8635    );
8636}
8637
8638#[test]
8639fn driver_best_to_fastest_releases_oversized_hc_tables() {
8640    let mut driver = MatchGeneratorDriver::new(32, 2);
8641
8642    // Initialize at Best routed onto HashChain via the test-only override
8643    // (production `Best` sits on level 13, whose native backend differs) —
8644    // allocates large HC tables (4M hash, 2M chain) so the swap below
8645    // exercises the HC drain path this test pins.
8646    driver.reset_on_hc_lazy(CompressionLevel::Best);
8647    assert_eq!(driver.window_size(), (1u64 << 22));
8648
8649    // Feed data so tables are actually allocated via ensure_tables().
8650    let mut space = driver.get_next_space();
8651    space[..12].copy_from_slice(b"abcabcabcabc");
8652    space.truncate(12);
8653    driver.commit_space(space);
8654    driver.skip_matching_with_hint(None);
8655
8656    // Switch to Fastest — the [`MatcherStorage`] enum swaps to the
8657    // `Simple` variant and the `HashChain` variant is dropped. The
8658    // drain block in `Matcher::reset` reassigns
8659    // `m.table.hash_table` / `chain_table` / `hash3_table` to
8660    // `Vec::new()` BEFORE constructing the replacement variant so the
8661    // table backing allocations are released up front — this caps
8662    // peak memory during the swap to "old data buffers being drained
8663    // into `vec_pool` + new `MatchGenerator` skeleton" rather than
8664    // "old tables still resident + new variant under construction".
8665    // The eventual `Drop` on the old variant would release the tables
8666    // anyway, but only after the new variant is built, so the early
8667    // reassign shifts the peak. Post-switch the HC variant no longer
8668    // exists; the assertion that storage is now `Simple` covers the
8669    // invariant the old hash_table/chain_table checks were proxying.
8670    driver.reset(CompressionLevel::Fastest);
8671    assert_eq!(driver.window_size(), (1u64 << 19));
8672    assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
8673}
8674
8675#[test]
8676fn driver_better_to_best_resizes_hc_tables() {
8677    let mut driver = MatchGeneratorDriver::new(32, 2);
8678
8679    // The lazy band runs on the Row backend now, so the HC resize path is
8680    // exercised across two BT levels whose native `HcConfig` widths differ:
8681    // L13 (hash_log 22, chain_log 22) -> L15 (hash_log 23, chain_log 23).
8682    driver.reset(CompressionLevel::Level(13));
8683    assert_eq!(driver.window_size(), (1u64 << 22));
8684
8685    let mut space = driver.get_next_space();
8686    space[..12].copy_from_slice(b"abcabcabcabc");
8687    space.truncate(12);
8688    driver.commit_space(space);
8689    driver.skip_matching_with_hint(None);
8690
8691    let hc = driver.hc_matcher();
8692    let better_hash_len = hc.table.hash_table.len();
8693    let better_chain_len = hc.table.chain_table.len();
8694
8695    // Switch to L15 — must resize to larger tables.
8696    driver.reset(CompressionLevel::Level(15));
8697    assert_eq!(driver.window_size(), (1u64 << 22));
8698
8699    // Feed data to trigger ensure_tables with new sizes.
8700    let mut space = driver.get_next_space();
8701    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
8702    space.truncate(12);
8703    driver.commit_space(space);
8704    driver.skip_matching_with_hint(None);
8705
8706    let hc = driver.hc_matcher();
8707    assert!(
8708        hc.table.hash_table.len() > better_hash_len,
8709        "L15 hash_table ({}) should be larger than L13 ({})",
8710        hc.table.hash_table.len(),
8711        better_hash_len
8712    );
8713    assert!(
8714        hc.table.chain_table.len() > better_chain_len,
8715        "L15 chain_table ({}) should be larger than L13 ({})",
8716        hc.table.chain_table.len(),
8717        better_chain_len
8718    );
8719}
8720
8721#[cfg(any())]
8722// disabled: tests legacy SuffixStore behavior incompatible with donor-shape kernel's HASH_READ_SIZE geometry
8723#[test]
8724fn prime_with_dictionary_preserves_history_for_first_full_block() {
8725    let mut driver = MatchGeneratorDriver::new(8, 1);
8726    driver.reset(CompressionLevel::Fastest);
8727
8728    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
8729
8730    let mut space = driver.get_next_space();
8731    space.clear();
8732    space.extend_from_slice(b"abcdefgh");
8733    driver.commit_space(space);
8734
8735    let mut saw_match = false;
8736    driver.start_matching(|seq| {
8737        if let Sequence::Triple {
8738            literals,
8739            offset,
8740            match_len,
8741        } = seq
8742            && literals.is_empty()
8743            && offset == 8
8744            && match_len >= MIN_MATCH_LEN
8745        {
8746            saw_match = true;
8747        }
8748    });
8749
8750    assert!(
8751        saw_match,
8752        "first full block should still match dictionary-primed history"
8753    );
8754}
8755
8756#[cfg(any())]
8757// disabled: tests legacy SuffixStore behavior incompatible with donor-shape kernel's HASH_READ_SIZE geometry
8758#[test]
8759fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
8760    let mut driver = MatchGeneratorDriver::new(8, 1);
8761    driver.reset(CompressionLevel::Fastest);
8762
8763    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
8764
8765    let mut space = driver.get_next_space();
8766    space.clear();
8767    space.extend_from_slice(b"abcdefgh");
8768    driver.commit_space(space);
8769
8770    let mut saw_match = false;
8771    driver.start_matching(|seq| {
8772        if let Sequence::Triple {
8773            literals,
8774            offset,
8775            match_len,
8776        } = seq
8777            && literals.is_empty()
8778            && offset == 24
8779            && match_len >= MIN_MATCH_LEN
8780        {
8781            saw_match = true;
8782        }
8783    });
8784
8785    assert!(
8786        saw_match,
8787        "dictionary bytes should remain addressable until frame output exceeds the live window"
8788    );
8789}
8790
8791#[test]
8792fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
8793    let mut driver = MatchGeneratorDriver::new(8, 1);
8794    driver.reset(CompressionLevel::Fastest);
8795
8796    driver.prime_with_dictionary(&[], [11, 7, 3]);
8797
8798    assert_eq!(driver.simple_mut().offset_hist, [11, 7, 3]);
8799}
8800
8801#[test]
8802fn hc_prime_with_empty_dictionary_disables_btultra2_seed_pass() {
8803    let mut driver = MatchGeneratorDriver::new(8, 1);
8804    driver.reset_on_hc_lazy(CompressionLevel::Better);
8805
8806    driver.prime_with_dictionary(&[], [11, 7, 3]);
8807
8808    assert_eq!(driver.hc_matcher().table.offset_hist, [11, 7, 3]);
8809    assert!(
8810        !driver
8811            .hc_matcher()
8812            .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
8813        "btultra2 warmup must stay disabled after dictionary priming, even when dict content is empty"
8814    );
8815}
8816
8817#[test]
8818fn primed_snapshot_not_restored_across_ldm_config_change() {
8819    // The CDict-equivalent primed snapshot clones `storage`, which on the
8820    // BT backend carries `BtMatcher::ldm_producer`. A snapshot captured
8821    // under one LDM configuration must NOT be restored into a reset that
8822    // resolved a different LDM configuration (else the restored producer
8823    // is stale). `PrimedKey` must fold the LDM override into the key so
8824    // such a restore is refused and the caller re-primes.
8825    use super::parameters::CompressionParameters;
8826
8827    let dict = b"abcdefghabcdefghabcdefgh";
8828    let ldm_on = CompressionParameters::builder(CompressionLevel::Level(19))
8829        .enable_long_distance_matching(true)
8830        .build()
8831        .unwrap()
8832        .overrides();
8833    let ldm_off = CompressionParameters::builder(CompressionLevel::Level(19))
8834        .build()
8835        .unwrap()
8836        .overrides();
8837
8838    let mut driver = MatchGeneratorDriver::new(1024, 1);
8839
8840    // Capture a snapshot primed under LDM-on at level 19.
8841    driver.set_param_overrides(Some(ldm_on));
8842    driver.reset(CompressionLevel::Level(19));
8843    driver.prime_with_dictionary(dict, [1, 4, 8]);
8844    driver.capture_primed_dictionary(CompressionLevel::Level(19));
8845
8846    // Same dictionary + level, but LDM now OFF: the snapshot's LDM state
8847    // is stale, so restore must be refused.
8848    driver.set_param_overrides(Some(ldm_off));
8849    driver.reset(CompressionLevel::Level(19));
8850    assert!(
8851        !driver.restore_primed_dictionary(CompressionLevel::Level(19)),
8852        "primed snapshot restored across an LDM config change (stale producer)",
8853    );
8854
8855    // Sanity: re-priming + capturing under LDM-off, then restoring under
8856    // the IDENTICAL LDM-off config DOES match (the key is not over-tight).
8857    driver.prime_with_dictionary(dict, [1, 4, 8]);
8858    driver.capture_primed_dictionary(CompressionLevel::Level(19));
8859    driver.reset(CompressionLevel::Level(19));
8860    assert!(
8861        driver.restore_primed_dictionary(CompressionLevel::Level(19)),
8862        "primed snapshot not restored under identical LDM config",
8863    );
8864}
8865
8866#[test]
8867fn hc_prime_with_dictionary_disables_btultra2_seed_pass() {
8868    let mut driver = MatchGeneratorDriver::new(8, 1);
8869    driver.reset_on_hc_lazy(CompressionLevel::Better);
8870
8871    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
8872
8873    assert!(
8874        !driver
8875            .hc_matcher()
8876            .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
8877        "btultra2 warmup must stay disabled after dictionary priming with content"
8878    );
8879}
8880
8881#[test]
8882fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
8883    let mut driver = MatchGeneratorDriver::new(8, 1);
8884    // Level(4) is Dfast with the greedy double-fast loop (donor parity:
8885    // clevels.h L3/L4 are both `ZSTD_dfast`, which has no lazy lookahead).
8886    // The fast loop needs at least `HASH_READ_SIZE` (8) bytes ahead of the
8887    // probe cursor, so this exercises a 16-byte dict + 16-byte block (the
8888    // whole block matches the dict, offset = dict length = 16).
8889    driver.reset(CompressionLevel::Level(4));
8890
8891    let payload = b"abcdefghijklmnop";
8892    driver.prime_with_dictionary(payload, [1, 4, 8]);
8893
8894    let mut space = driver.get_next_space();
8895    space.clear();
8896    space.extend_from_slice(payload);
8897    driver.commit_space(space);
8898
8899    let mut saw_match = false;
8900    driver.start_matching(|seq| {
8901        if let Sequence::Triple {
8902            literals,
8903            offset,
8904            match_len,
8905        } = seq
8906            && literals.is_empty()
8907            && offset == payload.len()
8908            && match_len >= DFAST_MIN_MATCH_LEN
8909        {
8910            saw_match = true;
8911        }
8912    });
8913
8914    assert!(
8915        saw_match,
8916        "dfast backend should match dictionary-primed history in first full block"
8917    );
8918}
8919
8920#[test]
8921fn prime_with_dictionary_does_not_inflate_reported_window_size() {
8922    let mut driver = MatchGeneratorDriver::new(8, 1);
8923    driver.reset(CompressionLevel::Fastest);
8924
8925    let before = driver.window_size();
8926    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
8927    let after = driver.window_size();
8928
8929    assert_eq!(
8930        after, before,
8931        "dictionary retention budget must not change reported frame window size"
8932    );
8933}
8934
8935#[test]
8936fn primed_snapshot_not_restored_when_window_hint_differs() {
8937    // The copy-snapshot must be keyed on the resolved reset parameters, not
8938    // just the CompressionLevel. `reset()` caps window_log by the source-size
8939    // hint, so two same-level frames with different hints resolve to different
8940    // windows. Restoring a snapshot captured at the larger hint into a reset
8941    // for the smaller hint would advertise the smaller window in the frame
8942    // header while the matcher's `max_window_size` (from the restored storage)
8943    // still spans the larger window — the encoder could then emit a match
8944    // (e.g. into the dictionary) past the advertised window, producing an
8945    // undecodable frame. Restore must REFUSE when the resolved window differs.
8946    let mut driver = MatchGeneratorDriver::new(8, 1);
8947    let level = CompressionLevel::Best;
8948
8949    // Frame A: large hint → larger resolved window. Prime + capture.
8950    driver.set_source_size_hint(256 * 1024);
8951    driver.reset(level);
8952    let big_window = driver.window_size();
8953    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
8954    driver.capture_primed_dictionary(level);
8955
8956    // Frame B: smaller hint, SAME level → smaller resolved window.
8957    driver.set_source_size_hint(48 * 1024);
8958    driver.reset(level);
8959    let small_window = driver.window_size();
8960    assert!(
8961        small_window < big_window,
8962        "precondition: the two hints must resolve to different windows \
8963         (small={small_window}, big={big_window})"
8964    );
8965
8966    let restored = driver.restore_primed_dictionary(level);
8967    assert!(
8968        !restored,
8969        "snapshot captured at window {big_window} must NOT be restored into a \
8970         reset advertising window {small_window} (level alone is an insufficient key)"
8971    );
8972}
8973
8974#[test]
8975fn primed_snapshot_restored_for_hints_in_same_window_bucket() {
8976    // The snapshot key must normalize the source-size hint to the resolved
8977    // matcher geometry, not the raw hinted byte count. `reset()` derives every
8978    // hint-dependent parameter (window_log cap, HC/Fast/Dfast/Row table widths,
8979    // the Fast attach-vs-copy cutoff) from `ceil_log2(hint)`, so two distinct
8980    // hints that share a ceil-log bucket resolve to the *identical* matcher
8981    // shape. Keying on the raw bytes over-keys: it forces a full re-prime on the
8982    // second frame even though the cached snapshot is a perfect fit. Restore
8983    // must SUCCEED across same-bucket hints.
8984    let mut driver = MatchGeneratorDriver::new(8, 1);
8985    let level = CompressionLevel::Best;
8986
8987    // Both hints fall in ceil_log2 bucket 19 (2^18 < n <= 2^19): 300 KiB and
8988    // 400 KiB resolve to the same window and table widths.
8989    driver.set_source_size_hint(300 * 1024);
8990    driver.reset(level);
8991    let window_a = driver.window_size();
8992    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
8993    driver.capture_primed_dictionary(level);
8994
8995    driver.set_source_size_hint(400 * 1024);
8996    driver.reset(level);
8997    let window_b = driver.window_size();
8998    assert_eq!(
8999        window_a, window_b,
9000        "precondition: same-bucket hints must resolve to the same window \
9001         (a={window_a}, b={window_b})"
9002    );
9003
9004    let restored = driver.restore_primed_dictionary(level);
9005    assert!(
9006        restored,
9007        "snapshot captured at a 300 KiB hint must be restored into a 400 KiB \
9008         hint that resolves to the identical matcher shape (raw bytes over-key)"
9009    );
9010}
9011
9012#[test]
9013fn primed_snapshot_restored_across_level22_donor_tier_hints() {
9014    // Level 22 collapses several ceil-log buckets onto one donor source-size
9015    // tier: `resolve_level_params(Level(22), ..)` selects the HC config and
9016    // window_log by raw `<= 16 KiB / 128 KiB / 256 KiB` thresholds, so a 20 KiB
9017    // and a 100 KiB hint (ceil-log buckets 15 and 17) both land in the
9018    // `<= 128 KiB` tier and resolve to the IDENTICAL matcher (same window_log,
9019    // same HC hash/chain/search geometry). Keying on the raw ceil-log bucket
9020    // would still reject the restore here because the buckets differ; the key
9021    // must compare the resolved matcher shape so these share one snapshot.
9022    let mut driver = MatchGeneratorDriver::new(8, 1);
9023    let level = CompressionLevel::Level(22);
9024
9025    driver.set_source_size_hint(20 * 1024);
9026    driver.reset(level);
9027    let window_a = driver.window_size();
9028    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
9029    driver.capture_primed_dictionary(level);
9030
9031    driver.set_source_size_hint(100 * 1024);
9032    driver.reset(level);
9033    let window_b = driver.window_size();
9034    assert_eq!(
9035        window_a, window_b,
9036        "precondition: both hints must land in the same Level 22 donor tier \
9037         (a={window_a}, b={window_b})"
9038    );
9039
9040    let restored = driver.restore_primed_dictionary(level);
9041    assert!(
9042        restored,
9043        "Level 22 snapshot captured at a 20 KiB hint must be restored into a \
9044         100 KiB hint that resolves to the same donor tier (different ceil-log \
9045         buckets, identical matcher shape)"
9046    );
9047}
9048
9049#[test]
9050fn primed_snapshot_not_restored_across_fast_attach_copy_boundary() {
9051    // The Fast attach-vs-copy cutoff (8 KiB) falls INSIDE a single resolved
9052    // matcher shape: a 8192-byte and a 8193-byte hint both clamp Level 1 to
9053    // window_log 14 and the same Fast table widths, so `LevelParams` +
9054    // `table_bits` are identical, yet 8192 attaches (separate dict table) while
9055    // 8193 copies (dict primed into the live table). The snapshot key must
9056    // therefore carry the attach/copy mode itself; without it the two resets
9057    // would share a key and a copy-mode snapshot could be restored into an
9058    // attach-mode reset (a different `storage` shape). Restore must REFUSE
9059    // across the boundary.
9060    let mut driver = MatchGeneratorDriver::new(8, 1);
9061    let level = CompressionLevel::Level(1);
9062
9063    // Copy side (hint > 8 KiB): prime + capture.
9064    driver.set_source_size_hint(8193);
9065    driver.reset(level);
9066    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
9067    driver.capture_primed_dictionary(level);
9068
9069    // Attach side (hint <= 8 KiB), same resolved window/table shape.
9070    driver.set_source_size_hint(8192);
9071    driver.reset(level);
9072    let restored = driver.restore_primed_dictionary(level);
9073    assert!(
9074        !restored,
9075        "a copy-mode snapshot (8193 B hint) must NOT be restored into an \
9076         attach-mode reset (8192 B hint) that resolves to the same params but a \
9077         different dict-table shape"
9078    );
9079}
9080
9081#[test]
9082fn primed_snapshot_fast_attach_does_not_over_key_non_simple_backends() {
9083    // `fast_attach` is a Simple/Fast-backend concept (the 8 KiB attach-vs-copy
9084    // table split). On the HashChain/Dfast/Row backends the dictionary is
9085    // always primed the same way, so the bit must NOT enter their snapshot key
9086    // — otherwise an unhinted capture (which would record `fast_attach = true`)
9087    // and a hinted reset that resolves to the IDENTICAL `LevelParams` would key
9088    // differently and force a needless re-prime. `Best` is a Row-backend lazy
9089    // level; this also pins the Row arm recording its RESOLVED hash width on
9090    // the unhinted path (a 0 default there keyed unhinted-vs-hinted apart).
9091    // An explicit Row-backend level: `Best` now sits on level 13 (Btlazy2),
9092    // so the named alias no longer reaches the Row arm this test pins.
9093    let mut driver = MatchGeneratorDriver::new(8, 1);
9094    let level = CompressionLevel::Level(12);
9095
9096    // Capture with no hint.
9097    driver.reset(level);
9098    let window_a = driver.window_size();
9099    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
9100    driver.capture_primed_dictionary(level);
9101
9102    // Reset with a hint large enough to resolve to the same window/params as
9103    // the unhinted level (>= 2^window_log, so the source-size cap is a no-op).
9104    driver.set_source_size_hint(64 * 1024 * 1024);
9105    driver.reset(level);
9106    let window_b = driver.window_size();
9107    assert_eq!(
9108        window_a, window_b,
9109        "precondition: the large hint must resolve to the same window as the \
9110         unhinted level (a={window_a}, b={window_b})"
9111    );
9112
9113    let restored = driver.restore_primed_dictionary(level);
9114    assert!(
9115        restored,
9116        "a Row snapshot must restore across an unhinted vs large-hinted \
9117         reset that resolves to the identical matcher — `fast_attach` is a Fast \
9118         backend concept and must not over-key non-Simple shapes"
9119    );
9120}
9121
9122#[cfg(any())] // disabled: tested SuffixStore-per-block tail-handling specific to legacy MatchGenerator
9123#[test]
9124fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
9125    let mut driver = MatchGeneratorDriver::new(8, 2);
9126    driver.reset(CompressionLevel::Fastest);
9127
9128    // This dictionary leaves a 1-byte tail chunk (capacity=1 suffix table),
9129    // which should never be committed to the matcher window.
9130    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
9131
9132    assert!(
9133        driver
9134            .simple()
9135            .window
9136            .iter()
9137            .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
9138        "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
9139    );
9140}
9141
9142#[test]
9143fn prime_with_dictionary_counts_only_committed_tail_budget() {
9144    let mut driver = MatchGeneratorDriver::new(8, 1);
9145    driver.reset(CompressionLevel::Fastest);
9146
9147    let before = driver.simple_mut().max_window_size;
9148    // One full slice plus a 1-byte tail that cannot be committed.
9149    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
9150
9151    assert_eq!(
9152        driver.simple_mut().max_window_size,
9153        before + 8,
9154        "retention budget must account only for dictionary bytes actually committed to history"
9155    );
9156}
9157
9158#[test]
9159fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
9160    let mut driver = MatchGeneratorDriver::new(8, 1);
9161    driver.reset(CompressionLevel::Level(3));
9162
9163    let before = driver.dfast_matcher().max_window_size;
9164    // One full slice plus a 4-byte tail. Dfast can still use this tail through
9165    // short-hash overlap into the next block, so it should stay retained.
9166    driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
9167
9168    assert_eq!(
9169        driver.dfast_matcher().max_window_size,
9170        before + 12,
9171        "dfast retention budget should include 4-byte dictionary tails"
9172    );
9173}
9174
9175#[test]
9176fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
9177    let mut driver = MatchGeneratorDriver::new(8, 1);
9178    // Level(5) is the greedy Row backend (LEVEL_TABLE row 5: Greedy / RowHash).
9179    // Level(4) now routes to Dfast, so this test must use Level(5) to actually
9180    // exercise `RowMatchGenerator`'s dictionary priming. The 16-byte dict +
9181    // 16-byte block lets the whole block match the primed dict (offset = dict
9182    // length = 16).
9183    driver.reset(CompressionLevel::Level(5));
9184
9185    let payload = b"abcdefghijklmnop";
9186    driver.prime_with_dictionary(payload, [1, 4, 8]);
9187
9188    let mut space = driver.get_next_space();
9189    space.clear();
9190    space.extend_from_slice(payload);
9191    driver.commit_space(space);
9192
9193    let mut saw_match = false;
9194    driver.start_matching(|seq| {
9195        if let Sequence::Triple {
9196            literals,
9197            offset,
9198            match_len,
9199        } = seq
9200            && literals.is_empty()
9201            && offset == payload.len()
9202            && match_len >= ROW_MIN_MATCH_LEN
9203        {
9204            saw_match = true;
9205        }
9206    });
9207
9208    assert!(
9209        saw_match,
9210        "row backend should match dictionary-primed history in first full block"
9211    );
9212}
9213
9214#[test]
9215fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
9216    let mut driver = MatchGeneratorDriver::new(8, 1);
9217    driver.reset(CompressionLevel::Level(5));
9218
9219    let base_window = driver.row_matcher().max_window_size;
9220    // Slice size is 8. The trailing byte cannot be committed (<4 tail),
9221    // so it must be subtracted from retained budget.
9222    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
9223
9224    assert_eq!(
9225        driver.row_matcher().max_window_size,
9226        base_window + 8,
9227        "row retained window must exclude uncommitted 1-byte tail"
9228    );
9229}
9230
9231#[test]
9232fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
9233    let mut driver = MatchGeneratorDriver::new(8, 1);
9234    driver.reset(CompressionLevel::Level(5));
9235    // Keep live window tiny so dictionary-primed slices are evicted quickly.
9236    driver.row_matcher_mut().max_window_size = 8;
9237    driver.reported_window_size = 8;
9238
9239    let base_window = driver.row_matcher().max_window_size;
9240    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
9241    assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
9242
9243    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
9244        let mut space = driver.get_next_space();
9245        space.clear();
9246        space.extend_from_slice(block);
9247        driver.commit_space(space);
9248        driver.skip_matching_with_hint(None);
9249    }
9250
9251    assert_eq!(
9252        driver.dictionary_retained_budget, 0,
9253        "dictionary budget should be fully retired once primed dict slices are evicted"
9254    );
9255    assert_eq!(
9256        driver.row_matcher().max_window_size,
9257        base_window,
9258        "retired dictionary budget must not remain reusable for live history"
9259    );
9260}
9261
9262/// Row → Simple transition drops the Row variant and the
9263/// post-switch active backend is exactly Simple. The window-emptied
9264/// check from the pre-enum era (`driver.row_matcher().window.is_empty()`)
9265/// is intentionally gone — the `Row` variant no longer exists after
9266/// the swap, so there is nothing to inspect by accessor; the "window
9267/// cleared" invariant is replaced by "variant dropped", and a
9268/// subsequent `row_matcher()` call would panic by design. The
9269/// pool-recycling side of the row backend is covered by
9270/// [`driver_row_commit_recycles_block_buffer_into_pool`].
9271#[test]
9272fn row_get_last_space_then_reset_to_fastest_drops_row_variant() {
9273    let mut driver = MatchGeneratorDriver::new(8, 1);
9274    driver.reset(CompressionLevel::Level(5));
9275    assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
9276
9277    let mut space = driver.get_next_space();
9278    space.clear();
9279    space.extend_from_slice(b"row-data");
9280    driver.commit_space(space);
9281
9282    assert_eq!(driver.get_last_space(), b"row-data");
9283
9284    driver.reset(CompressionLevel::Fastest);
9285    assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
9286}
9287
9288/// Committing a Row block must return the input buffer to `vec_pool`
9289/// immediately (the bytes are mirrored into the contiguous `history`,
9290/// so there is no reason to retain a second copy in the window). This
9291/// guards the chunk-length window: the previous `VecDeque<Vec<u8>>`
9292/// window retained a full `block_capacity` buffer per committed block,
9293/// which on a heavily pre-split frame ballooned peak memory to many
9294/// times the live byte count. With the buffer recycled at commit time
9295/// the pool grows by exactly one Vec per committed block.
9296#[test]
9297fn driver_row_commit_recycles_block_buffer_into_pool() {
9298    let mut driver = MatchGeneratorDriver::new(8, 1);
9299    driver.reset(CompressionLevel::Level(5));
9300    assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
9301
9302    let before_pool = driver.vec_pool.len();
9303    let mut space = driver.get_next_space();
9304    space.clear();
9305    space.extend_from_slice(b"row-data-to-recycle");
9306    driver.commit_space(space);
9307
9308    // `>` not `>=`: a fresh driver starts with `before_pool == 0`, so the
9309    // weaker bound passes even if the commit failed to recycle. Strict
9310    // growth proves the buffer was returned to the pool at commit time
9311    // rather than retained in the window (the pre-`chunk_lens` bug).
9312    assert!(
9313        driver.vec_pool.len() > before_pool,
9314        "row commit must recycle the committed block buffer into vec_pool \
9315         (before_pool = {before_pool}, after = {})",
9316        driver.vec_pool.len()
9317    );
9318    // The bytes still resolve through the contiguous history mirror.
9319    assert_eq!(driver.get_last_space(), b"row-data-to-recycle");
9320}
9321
9322#[test]
9323fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
9324    let mut params = resolve_level_params(CompressionLevel::Level(4), None);
9325    params.window_log = 22;
9326    let adjusted = adjust_params_for_source_size(params, 0);
9327    assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
9328}
9329
9330#[test]
9331fn common_prefix_len_matches_scalar_reference_across_offsets() {
9332    fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
9333        a.iter()
9334            .zip(b.iter())
9335            .take_while(|(lhs, rhs)| lhs == rhs)
9336            .count()
9337    }
9338
9339    for total_len in [
9340        0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
9341    ] {
9342        let base: Vec<u8> = (0..total_len)
9343            .map(|i| ((i * 13 + 7) & 0xFF) as u8)
9344            .collect();
9345
9346        for start in [0usize, 1, 3] {
9347            if start > total_len {
9348                continue;
9349            }
9350            let a = &base[start..];
9351            let b = a.to_vec();
9352            assert_eq!(
9353                common_prefix_len(a, &b),
9354                scalar_reference(a, &b),
9355                "equal slices total_len={total_len} start={start}"
9356            );
9357
9358            let len = a.len();
9359            for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
9360                if mismatch >= len {
9361                    continue;
9362                }
9363                let mut altered = b.clone();
9364                altered[mismatch] ^= 0x5A;
9365                assert_eq!(
9366                    common_prefix_len(a, &altered),
9367                    scalar_reference(a, &altered),
9368                    "total_len={total_len} start={start} mismatch={mismatch}"
9369                );
9370            }
9371
9372            if len > 0 {
9373                let mismatch = len - 1;
9374                let mut altered = b.clone();
9375                altered[mismatch] ^= 0xA5;
9376                assert_eq!(
9377                    common_prefix_len(a, &altered),
9378                    scalar_reference(a, &altered),
9379                    "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
9380                );
9381            }
9382        }
9383    }
9384
9385    let long = alloc::vec![0xAB; 320];
9386    let shorter = alloc::vec![0xAB; 137];
9387    assert_eq!(
9388        common_prefix_len(&long, &shorter),
9389        scalar_reference(&long, &shorter)
9390    );
9391}
9392
9393#[test]
9394fn row_pick_lazy_returns_none_when_next_is_better() {
9395    let mut matcher = RowMatchGenerator::new(1 << 22);
9396    matcher.configure(ROW_CONFIG);
9397    matcher.add_data(alloc::vec![b'a'; 64], |_| {});
9398    matcher.ensure_tables();
9399
9400    let abs_pos = matcher.history_abs_start + 16;
9401    let best = MatchCandidate {
9402        start: abs_pos,
9403        offset: 8,
9404        match_len: ROW_MIN_MATCH_LEN,
9405    };
9406    assert!(
9407        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
9408        "lazy picker should defer when next position is clearly better"
9409    );
9410}
9411
9412#[test]
9413fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
9414    let mut matcher = RowMatchGenerator::new(1 << 22);
9415    matcher.configure(ROW_CONFIG);
9416    matcher.lazy_depth = 2;
9417    matcher.search_depth = 0;
9418    matcher.offset_hist = [6, 9, 1];
9419
9420    let mut data = alloc::vec![b'x'; 40];
9421    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
9422    matcher.add_data(data, |_| {});
9423    matcher.ensure_tables();
9424
9425    let abs_pos = matcher.history_abs_start + 20;
9426    let best = matcher
9427        .best_match(abs_pos, 0)
9428        .expect("expected baseline repcode match");
9429    assert_eq!(best.offset, 9);
9430    // Baseline match length is fixed by the fixture data (the offset-9
9431    // rep run is 6 bytes long), independent of the accept threshold.
9432    assert_eq!(best.match_len, 6);
9433
9434    if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
9435        assert!(next.match_len <= best.match_len);
9436    }
9437
9438    let next2 = matcher
9439        .best_match(abs_pos + 2, 2)
9440        .expect("expected +2 candidate");
9441    assert!(
9442        next2.match_len > best.match_len + 1,
9443        "+2 candidate must be significantly better for depth-2 lazy skip"
9444    );
9445    assert!(
9446        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
9447        "lazy picker should defer when +2 candidate is significantly better"
9448    );
9449}
9450
9451#[test]
9452fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
9453    let mut matcher = RowMatchGenerator::new(1 << 22);
9454    matcher.configure(ROW_CONFIG);
9455    matcher.lazy_depth = 2;
9456    matcher.search_depth = 0;
9457    matcher.offset_hist = [6, 9, 1];
9458
9459    let mut data = alloc::vec![b'x'; 40];
9460    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
9461    matcher.add_data(data, |_| {});
9462    matcher.ensure_tables();
9463
9464    let abs_pos = matcher.history_abs_start + 20;
9465    let best = matcher
9466        .best_match(abs_pos, 0)
9467        .expect("expected baseline repcode match");
9468    assert_eq!(best.offset, 9);
9469    // Baseline match length is fixed by the fixture data (the offset-9
9470    // rep run is 6 bytes long), independent of the accept threshold.
9471    assert_eq!(best.match_len, 6);
9472
9473    let next2 = matcher
9474        .best_match(abs_pos + 2, 2)
9475        .expect("expected +2 candidate");
9476    assert_eq!(next2.match_len, best.match_len + 1);
9477    let chosen = matcher
9478        .pick_lazy_match(abs_pos, 0, Some(best))
9479        .expect("lazy picker should keep current best");
9480    assert_eq!(chosen.start, best.start);
9481    assert_eq!(chosen.offset, best.offset);
9482    assert_eq!(chosen.match_len, best.match_len);
9483}
9484
9485/// Verifies row/tag extraction uses the shared hash mix bit-splitting contract.
9486#[test]
9487fn row_hash_and_row_extracts_high_bits() {
9488    let mut matcher = RowMatchGenerator::new(1 << 22);
9489    matcher.configure(ROW_CONFIG);
9490    matcher.add_data(
9491        alloc::vec![
9492            0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
9493            0x70, 0x80,
9494        ],
9495        |_| {},
9496    );
9497    matcher.ensure_tables();
9498
9499    let pos = matcher.history_abs_start + 8;
9500    let (row, tag) = matcher
9501        .hash_and_row(pos)
9502        .expect("row hash should be available");
9503
9504    let idx = pos - matcher.history_abs_start;
9505    let concat = matcher.live_history();
9506    // Mirror `row_key_value`: an mls-wide masked key when 8 lookahead bytes
9507    // exist, the 4-byte key in the tail. `idx = 8` on a 16-byte history has
9508    // exactly 8 bytes left, so the wide arm applies here.
9509    let key_len = matcher.mls.min(6);
9510    let value = u64::from_le_bytes(concat[idx..idx + 8].try_into().unwrap())
9511        & ((1u64 << (key_len * 8)) - 1);
9512    let hash = crate::encoding::fastpath::hash_mix_u64_with_kernel(matcher.hash_kernel, value);
9513    let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
9514    let combined = hash >> (u64::BITS as usize - total_bits);
9515    let expected_row =
9516        ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
9517    let expected_tag = combined as u8;
9518
9519    assert_eq!(row, expected_row);
9520    assert_eq!(tag, expected_tag);
9521}
9522
9523#[test]
9524fn row_repcode_skips_candidate_before_history_start() {
9525    let mut matcher = RowMatchGenerator::new(1 << 22);
9526    matcher.configure(ROW_CONFIG);
9527    matcher.history = alloc::vec![b'a'; 20];
9528    matcher.history_start = 0;
9529    matcher.history_abs_start = 10;
9530    matcher.offset_hist = [3, 0, 0];
9531
9532    assert!(matcher.repcode_candidate(12, 1).is_none());
9533}
9534
9535#[test]
9536fn row_repcode_returns_none_when_position_too_close_to_history_end() {
9537    let mut matcher = RowMatchGenerator::new(1 << 22);
9538    matcher.configure(ROW_CONFIG);
9539    matcher.history = b"abcde".to_vec();
9540    matcher.history_start = 0;
9541    matcher.history_abs_start = 0;
9542    matcher.offset_hist = [1, 0, 0];
9543
9544    assert!(matcher.repcode_candidate(4, 1).is_none());
9545}
9546
9547#[cfg(all(feature = "std", target_arch = "x86_64"))]
9548#[test]
9549fn hash_mix_sse42_path_is_available_and_matches_accelerated_impl_when_supported() {
9550    use crate::encoding::fastpath::{self, FastpathKernel};
9551    if !is_x86_feature_detected!("sse4.2") {
9552        return;
9553    }
9554    let v = 0x0123_4567_89AB_CDEFu64;
9555    // SAFETY: feature check above guarantees SSE4.2 is available.
9556    let accelerated = unsafe { fastpath::sse42::hash_mix_u64(v) };
9557    // Dispatcher must resolve to SSE4.2 (or better) and produce the same mix.
9558    let dispatched = fastpath::dispatch_hash_mix_u64(v);
9559    let kernel = fastpath::select_kernel();
9560    if kernel == FastpathKernel::Sse42 {
9561        assert_eq!(dispatched, accelerated);
9562    } else {
9563        // AVX2 kernel uses the same CRC32 instruction under the hood.
9564        assert_eq!(dispatched, accelerated, "AVX2/SSE4.2 share CRC32 mix");
9565    }
9566}
9567
9568#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
9569#[test]
9570fn hash_mix_crc_path_is_available_and_matches_accelerated_impl_when_supported() {
9571    use crate::encoding::fastpath;
9572    if !is_aarch64_feature_detected!("crc") {
9573        return;
9574    }
9575    let v = 0x0123_4567_89AB_CDEFu64;
9576    // SAFETY: feature check above guarantees CRC32 is available.
9577    let accelerated = unsafe { fastpath::neon::hash_mix_u64(v) };
9578    let dispatched = fastpath::dispatch_hash_mix_u64(v);
9579    assert_eq!(dispatched, accelerated);
9580}
9581
9582#[test]
9583fn hc_hash3_position_matches_donor_formula() {
9584    let bytes = [b'a', b'b', b'c', b'd'];
9585    let read32 = u32::from_le_bytes(bytes);
9586    let expected = (((read32 << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - HC3_HASH_LOG)) as usize;
9587    assert_eq!(
9588        super::match_table::storage::MatchTable::hash3_position(&bytes, HC3_HASH_LOG),
9589        expected
9590    );
9591}
9592
9593#[test]
9594fn hc_hash_position_matches_donor_hash4_formula() {
9595    let mut hc = HcMatchGenerator::new(1 << 20);
9596    hc.configure(HC_CONFIG, super::strategy::StrategyTag::Lazy, 22);
9597    let bytes = [b'a', b'b', b'c', b'd'];
9598    let read32 = u32::from_le_bytes(bytes);
9599    let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
9600    assert_eq!(hc.table.hash_position(&bytes), expected);
9601}
9602
9603#[test]
9604fn btultra2_main_hash_uses_donor_hash4_formula() {
9605    let mut hc = HcMatchGenerator::new(1 << 20);
9606    hc.configure(
9607        BTULTRA2_HC_CONFIG_L22,
9608        super::strategy::StrategyTag::BtUltra2,
9609        27,
9610    );
9611    let bytes = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h'];
9612    let read32 = u32::from_le_bytes(bytes[..4].try_into().unwrap());
9613    let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
9614    let actual = super::match_table::storage::MatchTable::hash_position_with_mls(
9615        &bytes,
9616        hc.table.hash_log,
9617        super::bt::BtMatcher::HASH_MLS,
9618    );
9619    assert_eq!(actual, expected);
9620}
9621
9622#[test]
9623fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
9624    let mut matcher = RowMatchGenerator::new(1 << 22);
9625    matcher.configure(ROW_CONFIG);
9626    // One byte short of the accept floor: from abs_pos 0 there are fewer
9627    // than `ROW_MIN_MATCH_LEN` bytes left, so the length gate in
9628    // `row_candidate` must short-circuit to `None` before touching the
9629    // (here unbuilt) row tables.
9630    matcher.history = alloc::vec![b'a'; ROW_MIN_MATCH_LEN - 1];
9631    matcher.history_start = 0;
9632    matcher.history_abs_start = 0;
9633
9634    assert!(matcher.row_candidate(0, 0).is_none());
9635}
9636
9637#[test]
9638fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
9639    let mut hc = HcMatchGenerator::new(32);
9640    hc.table.history = b"abc".to_vec();
9641    hc.table.history_start = 0;
9642    hc.table.history_abs_start = 0;
9643    hc.table.ensure_tables();
9644
9645    let candidates = hc.hc.chain_candidates(&hc.table, 0);
9646    assert!(candidates.iter().all(|&pos| pos == usize::MAX));
9647}
9648
9649#[test]
9650fn hc_reset_advances_floor_past_prior_frame_entries() {
9651    use super::match_table::storage::MatchTable;
9652    let mut hc = HcMatchGenerator::new(32);
9653    hc.table.add_data(b"abcdeabcde".to_vec(), |_| {});
9654    hc.table.ensure_tables();
9655    // Populate real hash / chain entries for the first frame's positions.
9656    hc.table.insert_positions(0, 6);
9657    let prev_end = hc.table.history_abs_end();
9658    assert_eq!(prev_end, 10);
9659    assert!(hc.table.hash_table.iter().any(|&v| v != HC_EMPTY));
9660
9661    hc.reset(|_| {});
9662
9663    // Behavioural contract: the previous frame's entries are no longer
9664    // matchable. `reset` advances the floor past every prior position
9665    // instead of zeroing the tables, so each populated slot now decodes
9666    // to an absolute position strictly below `history_abs_start` and is
9667    // rejected by the `window_low` guard before any byte is read.
9668    assert_eq!(hc.table.history_abs_start, prev_end);
9669    for &slot in hc.table.hash_table.iter() {
9670        if let Some(candidate_abs) =
9671            MatchTable::stored_abs_position_fast(slot, hc.table.position_base, hc.table.index_shift)
9672        {
9673            assert!(
9674                candidate_abs < hc.table.history_abs_start,
9675                "a prior-frame entry must resolve below the advanced floor"
9676            );
9677        }
9678    }
9679}
9680
9681#[test]
9682fn hc_reset_full_zeroes_when_floor_would_cross_ceiling() {
9683    use super::match_table::storage::REBASE_RESET_FLOOR_CEILING;
9684    let mut hc = HcMatchGenerator::new(32);
9685    hc.table.add_data(b"abcdeabcde".to_vec(), |_| {});
9686    hc.table.ensure_tables();
9687    hc.table.hash_table.fill(123);
9688    hc.table.chain_table.fill(456);
9689    // Push the would-be floor (`history_abs_end`) past the ceiling so
9690    // `reset` takes the bounded fallback: rewind to the origin and zero
9691    // the tables, keeping the absolute cursor from climbing toward
9692    // `usize::MAX` on 32-bit targets.
9693    hc.table.history_abs_start = REBASE_RESET_FLOOR_CEILING;
9694
9695    hc.reset(|_| {});
9696
9697    assert_eq!(hc.table.history_abs_start, 0);
9698    assert_eq!(hc.table.position_base, 0);
9699    assert!(hc.table.hash_table.iter().all(|&v| v == HC_EMPTY));
9700    assert!(hc.table.chain_table.iter().all(|&v| v == HC_EMPTY));
9701}
9702
9703#[test]
9704fn hc_start_matching_returns_early_for_empty_current_block() {
9705    let mut hc = HcMatchGenerator::new(32);
9706    hc.table.add_data(Vec::new(), |_| {});
9707    let mut called = false;
9708    hc.start_matching(|_| called = true);
9709    assert!(!called, "empty current block should not emit sequences");
9710}
9711
9712#[cfg(test)]
9713fn deterministic_high_entropy_bytes(seed: u64, len: usize) -> Vec<u8> {
9714    let mut out = Vec::with_capacity(len);
9715    let mut state = seed;
9716    for _ in 0..len {
9717        state ^= state << 13;
9718        state ^= state >> 7;
9719        state ^= state << 17;
9720        out.push((state >> 40) as u8);
9721    }
9722    out
9723}
9724
9725#[cfg(test)]
9726fn level22_donor_block_ranges(data: &[u8]) -> Vec<(usize, usize)> {
9727    let mut ranges = Vec::new();
9728    let mut cursor = 0usize;
9729    let mut savings = 0i64;
9730    while cursor < data.len() {
9731        let remaining = data.len() - cursor;
9732        let candidate_len = remaining.min(HC_BLOCKSIZE_MAX);
9733        let block_len = crate::encoding::frame_compressor::optimal_block_size(
9734            CompressionLevel::Level(22),
9735            &data[cursor..cursor + candidate_len],
9736            remaining,
9737            HC_BLOCKSIZE_MAX,
9738            savings,
9739        )
9740        .min(candidate_len)
9741        .max(1);
9742        ranges.push((cursor, block_len));
9743        cursor += block_len;
9744        // The exact donor gate uses compressed-size savings. For this corpus
9745        // parity harness, after the first full block has compressed, savings is
9746        // sufficient to authorize the same pre-block splitter path.
9747        if cursor >= HC_BLOCKSIZE_MAX {
9748            savings = 3;
9749        }
9750    }
9751    ranges
9752}
9753
9754#[cfg(test)]
9755fn merge_block_delimiters_like_donor(
9756    sequences: Vec<(usize, usize, usize)>,
9757) -> Vec<(usize, usize, usize)> {
9758    let mut out = Vec::with_capacity(sequences.len());
9759    let mut pending_lits = 0usize;
9760    for (lit_len, offset, match_len) in sequences {
9761        if offset == 0 && match_len == 0 {
9762            pending_lits = pending_lits.saturating_add(lit_len);
9763            continue;
9764        }
9765        out.push((lit_len.saturating_add(pending_lits), offset, match_len));
9766        pending_lits = 0;
9767    }
9768    if pending_lits > 0 {
9769        out.push((pending_lits, 0, 0));
9770    }
9771    out
9772}
9773
9774#[cfg(test)]
9775fn collect_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
9776    merge_block_delimiters_like_donor(collect_level22_sequences_with_delimiters(data))
9777        .into_iter()
9778        .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
9779        .collect()
9780}
9781
9782#[cfg(test)]
9783fn collect_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
9784    let mut driver = MatchGeneratorDriver::new(HC_BLOCKSIZE_MAX, 1);
9785    driver.set_source_size_hint(data.len() as u64);
9786    driver.reset(CompressionLevel::Level(22));
9787
9788    let mut sequences = Vec::new();
9789    for (chunk_start, chunk_len) in level22_donor_block_ranges(data) {
9790        let chunk = &data[chunk_start..chunk_start + chunk_len];
9791        let mut space = driver.get_next_space();
9792        space[..chunk.len()].copy_from_slice(chunk);
9793        space.truncate(chunk.len());
9794        driver.commit_space(space);
9795        driver.start_matching(|seq| {
9796            let entry = match seq {
9797                Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
9798                Sequence::Triple {
9799                    literals,
9800                    offset,
9801                    match_len,
9802                } => (literals.len(), offset, match_len),
9803            };
9804            sequences.push(entry);
9805        });
9806    }
9807    sequences
9808}
9809
9810#[cfg(test)]
9811fn donor_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
9812    merge_block_delimiters_like_donor(donor_level22_sequences_with_delimiters(data))
9813        .into_iter()
9814        .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
9815        .collect()
9816}
9817
9818#[cfg(test)]
9819fn donor_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
9820    use zstd::zstd_safe;
9821    use zstd::zstd_safe::zstd_sys;
9822
9823    fn assert_zstd_ok(code: usize, context: &str) {
9824        assert_eq!(
9825            unsafe { zstd_sys::ZSTD_isError(code) },
9826            0,
9827            "{context} failed: {}",
9828            zstd_safe::get_error_name(code)
9829        );
9830    }
9831
9832    unsafe {
9833        let cctx = zstd_sys::ZSTD_createCCtx();
9834        assert!(!cctx.is_null(), "ZSTD_createCCtx returned null");
9835
9836        assert_zstd_ok(
9837            zstd_sys::ZSTD_CCtx_setParameter(
9838                cctx,
9839                zstd_sys::ZSTD_cParameter::ZSTD_c_compressionLevel,
9840                22,
9841            ),
9842            "ZSTD_c_compressionLevel",
9843        );
9844
9845        let seq_capacity = zstd_safe::sequence_bound(data.len());
9846        let mut seqs = alloc::vec![
9847            zstd_sys::ZSTD_Sequence {
9848                offset: 0,
9849                litLength: 0,
9850                matchLength: 0,
9851                rep: 0,
9852            };
9853            seq_capacity
9854        ];
9855
9856        let seq_count = zstd_sys::ZSTD_generateSequences(
9857            cctx,
9858            seqs.as_mut_ptr(),
9859            seqs.len(),
9860            data.as_ptr().cast(),
9861            data.len(),
9862        );
9863        assert_zstd_ok(seq_count, "ZSTD_generateSequences");
9864        let rc = zstd_sys::ZSTD_freeCCtx(cctx);
9865        assert_eq!(rc, 0, "ZSTD_freeCCtx failed");
9866
9867        seqs.truncate(seq_count);
9868        seqs.into_iter()
9869            .map(|seq| {
9870                (
9871                    seq.litLength as usize,
9872                    seq.offset as usize,
9873                    seq.matchLength as usize,
9874                )
9875            })
9876            .collect()
9877    }
9878}
9879
9880#[test]
9881fn level22_sequences_match_donor_on_corpus_proxy() {
9882    let data = include_bytes!("../../decodecorpus_files/z000033");
9883    assert_level22_sequences_match_donor(data);
9884}
9885
9886#[test]
9887fn level22_sequences_match_donor_on_small_corpus_proxy() {
9888    let data = include_bytes!("../../decodecorpus_files/z000030");
9889    assert_level22_sequences_match_donor(data);
9890}
9891
9892#[cfg(test)]
9893fn assert_level22_sequences_match_donor(data: &[u8]) {
9894    let rust = collect_level22_sequences(data);
9895    let donor = donor_level22_sequences(data);
9896
9897    if rust != donor {
9898        let first_diff = rust
9899            .iter()
9900            .zip(donor.iter())
9901            .position(|(lhs, rhs)| lhs != rhs)
9902            .unwrap_or_else(|| rust.len().min(donor.len()));
9903        let rust_pos = rust
9904            .iter()
9905            .take(first_diff)
9906            .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
9907        let donor_pos = donor
9908            .iter()
9909            .take(first_diff)
9910            .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
9911        let start = first_diff.saturating_sub(4);
9912        let rust_window = &rust[start..rust.len().min(first_diff + 4)];
9913        let donor_window = &donor[start..donor.len().min(first_diff + 4)];
9914        let mut reps = [1u32, 4, 8];
9915        for (lit_len, offset, _) in rust.iter().take(first_diff) {
9916            let _ = encode_offset_with_history(*offset as u32, *lit_len as u32, &mut reps);
9917        }
9918        panic!(
9919            "level22 sequence path diverged at idx {}: rust={:?} donor={:?} (rust_len={} donor_len={} rust_pos={} donor_pos={} reps_before={:?} rust_window={:?} donor_window={:?} block_ranges={:?})",
9920            first_diff,
9921            rust.get(first_diff),
9922            donor.get(first_diff),
9923            rust.len(),
9924            donor.len(),
9925            rust_pos,
9926            donor_pos,
9927            reps,
9928            rust_window,
9929            donor_window,
9930            level22_donor_block_ranges(data)
9931                .into_iter()
9932                .filter(|(start, len)| *start <= rust_pos && rust_pos < start + len)
9933                .collect::<Vec<_>>(),
9934        );
9935    }
9936}
9937
9938#[test]
9939fn hc_sparse_skip_matching_preserves_tail_cross_block_match() {
9940    let mut matcher = HcMatchGenerator::new(1 << 22);
9941    let tail = b"Qz9kLm2Rp";
9942    let mut first = deterministic_high_entropy_bytes(0xD1B5_4A32_9C77_0E19, 4096);
9943    let tail_start = first.len() - tail.len();
9944    first[tail_start..].copy_from_slice(tail);
9945    matcher.table.add_data(first.clone(), |_| {});
9946    matcher.skip_matching(Some(true));
9947
9948    let mut second = tail.to_vec();
9949    second.extend_from_slice(b"after-tail-literals");
9950    matcher.table.add_data(second, |_| {});
9951
9952    let mut first_sequence = None;
9953    matcher.start_matching(|seq| {
9954        if first_sequence.is_some() {
9955            return;
9956        }
9957        first_sequence = Some(match seq {
9958            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
9959            Sequence::Triple {
9960                literals,
9961                offset,
9962                match_len,
9963            } => (literals.len(), offset, match_len),
9964        });
9965    });
9966
9967    let (literals_len, offset, match_len) =
9968        first_sequence.expect("expected at least one sequence after sparse skip");
9969    assert_eq!(
9970        literals_len, 0,
9971        "first sequence should start at block boundary"
9972    );
9973    assert_eq!(
9974        offset,
9975        tail.len(),
9976        "first match should reference previous tail"
9977    );
9978    assert!(
9979        match_len >= tail.len(),
9980        "tail-aligned cross-block match must be preserved"
9981    );
9982}
9983
9984#[test]
9985fn btultra2_sparse_skip_matching_preserves_tail_cross_block_match() {
9986    let mut matcher = HcMatchGenerator::new(1 << 20);
9987    matcher.configure(
9988        BTULTRA2_HC_CONFIG_L22,
9989        super::strategy::StrategyTag::BtUltra2,
9990        20,
9991    );
9992    let tail = b"Bt9kLm2Rp";
9993    let mut first = deterministic_high_entropy_bytes(0xA9C3_7F21_D4E8_510B, 4096);
9994    let tail_start = first.len() - tail.len();
9995    first[tail_start..].copy_from_slice(tail);
9996    matcher.table.add_data(first, |_| {});
9997    matcher.skip_matching(Some(true));
9998
9999    let mut second = tail.to_vec();
10000    second.extend_from_slice(b"after-tail-literals");
10001    matcher.table.add_data(second, |_| {});
10002
10003    let mut first_sequence = None;
10004    matcher.start_matching(|seq| {
10005        if first_sequence.is_some() {
10006            return;
10007        }
10008        first_sequence = Some(match seq {
10009            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
10010            Sequence::Triple {
10011                literals,
10012                offset,
10013                match_len,
10014            } => (literals.len(), offset, match_len),
10015        });
10016    });
10017
10018    let (literals_len, offset, match_len) =
10019        first_sequence.expect("expected at least one sequence after sparse BT skip");
10020    assert_eq!(
10021        literals_len, 0,
10022        "BT sparse skip should preserve an immediate boundary match"
10023    );
10024    assert_eq!(
10025        offset,
10026        tail.len(),
10027        "first BT match should reference previous tail"
10028    );
10029    assert!(
10030        match_len >= tail.len(),
10031        "BT sparse skip must seed the dense tail for cross-block matching"
10032    );
10033}
10034
10035#[test]
10036fn hc_sparse_skip_matching_does_not_reinsert_sparse_tail_positions() {
10037    let mut matcher = HcMatchGenerator::new(1 << 22);
10038    let first = deterministic_high_entropy_bytes(0xC2B2_AE3D_27D4_EB4F, 4096);
10039    matcher.table.add_data(first.clone(), |_| {});
10040    matcher.skip_matching(Some(true));
10041
10042    let current_len = first.len();
10043    let current_abs_start =
10044        matcher.table.history_abs_start + matcher.table.window_size - current_len;
10045    let current_abs_end = current_abs_start + current_len;
10046    let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
10047    let tail_start = current_abs_end
10048        .saturating_sub(dense_tail)
10049        .max(matcher.table.history_abs_start)
10050        .max(current_abs_start);
10051
10052    let overlap_pos = (tail_start..current_abs_end)
10053        .find(|&pos| (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP))
10054        .expect("fixture should contain at least one sparse-grid overlap in dense tail");
10055
10056    let rel = matcher
10057        .table
10058        .relative_position(overlap_pos)
10059        .expect("overlap position should be representable as relative position");
10060    let chain_idx = rel as usize & ((1 << matcher.table.chain_log) - 1);
10061    assert_ne!(
10062        matcher.table.chain_table[chain_idx],
10063        rel + 1,
10064        "sparse-grid tail positions must not be reinserted (self-loop chain entry)"
10065    );
10066}
10067
10068#[test]
10069fn hc_compact_history_drains_when_threshold_crossed() {
10070    let mut hc = HcMatchGenerator::new(8);
10071    hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
10072    hc.table.history_start = 16;
10073    hc.table.compact_history();
10074    assert_eq!(hc.table.history_start, 0);
10075    assert_eq!(hc.table.history, b"qrstuvwxyz");
10076}
10077
10078#[test]
10079fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
10080    let mut hc = HcMatchGenerator::new(32);
10081    hc.table.history = b"abcdefghijklmnop".to_vec();
10082    hc.table.history_abs_start = 0;
10083    hc.table.position_base = 1;
10084    hc.table.ensure_tables();
10085    let before_hash = hc.table.hash_table.clone();
10086    let before_chain = hc.table.chain_table.clone();
10087
10088    hc.table.insert_position_no_rebase(0);
10089
10090    assert_eq!(hc.table.hash_table, before_hash);
10091    assert_eq!(hc.table.chain_table, before_chain);
10092}
10093
10094#[test]
10095fn hc_insert_positions_advances_next_to_update3_for_contiguous_range() {
10096    let mut hc = HcMatchGenerator::new(64);
10097    hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
10098    hc.table.history_start = 0;
10099    hc.table.history_abs_start = 0;
10100    hc.table.position_base = 0;
10101    hc.table.ensure_tables();
10102    hc.table.next_to_update3 = 0;
10103
10104    hc.table.insert_positions(0, 9);
10105
10106    assert_eq!(
10107        hc.table.next_to_update3, 9,
10108        "contiguous insert_positions should advance hash3 update cursor"
10109    );
10110}
10111
10112#[test]
10113fn hc_insert_positions_with_step_keeps_next_to_update3_cursor_for_sparse_ranges() {
10114    let mut hc = HcMatchGenerator::new(64);
10115    hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
10116    hc.table.history_start = 0;
10117    hc.table.history_abs_start = 0;
10118    hc.table.position_base = 0;
10119    hc.table.ensure_tables();
10120    hc.table.next_to_update3 = 0;
10121
10122    hc.table.insert_positions_with_step(0, 16, 4);
10123
10124    assert_eq!(
10125        hc.table.next_to_update3, 0,
10126        "sparse insert_positions_with_step must not mark skipped positions as hash3-updated"
10127    );
10128}
10129
10130#[cfg(any())]
10131// disabled: tests legacy SuffixStore behavior incompatible with donor-shape kernel's HASH_READ_SIZE geometry
10132#[test]
10133fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
10134    let mut driver = MatchGeneratorDriver::new(8, 1);
10135    driver.reset(CompressionLevel::Fastest);
10136    // Use a small live window so dictionary-primed slices are evicted
10137    // quickly and budget retirement can be asserted deterministically.
10138    driver.simple_mut().max_window_size = 8;
10139    driver.reported_window_size = 8;
10140
10141    let base_window = driver.simple_mut().max_window_size;
10142    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
10143    assert_eq!(driver.simple_mut().max_window_size, base_window + 24);
10144
10145    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
10146        let mut space = driver.get_next_space();
10147        space.clear();
10148        space.extend_from_slice(block);
10149        driver.commit_space(space);
10150        driver.skip_matching_with_hint(None);
10151    }
10152
10153    assert_eq!(
10154        driver.dictionary_retained_budget, 0,
10155        "dictionary budget should be fully retired once primed dict slices are evicted"
10156    );
10157    assert_eq!(
10158        driver.simple_mut().max_window_size,
10159        base_window,
10160        "retired dictionary budget must not remain reusable for live history"
10161    );
10162}
10163
10164#[test]
10165fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
10166    let mut driver = MatchGeneratorDriver::new(8, 1);
10167    driver.reset(CompressionLevel::Level(3));
10168    // Use a small live window in this regression so dictionary-primed slices are
10169    // evicted quickly and budget retirement can be asserted deterministically.
10170    driver.dfast_matcher_mut().max_window_size = 8;
10171    driver.reported_window_size = 8;
10172
10173    let base_window = driver.dfast_matcher().max_window_size;
10174    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
10175    assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
10176
10177    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
10178        let mut space = driver.get_next_space();
10179        space.clear();
10180        space.extend_from_slice(block);
10181        driver.commit_space(space);
10182        driver.skip_matching_with_hint(None);
10183    }
10184
10185    assert_eq!(
10186        driver.dictionary_retained_budget, 0,
10187        "dictionary budget should be fully retired once primed dict slices are evicted"
10188    );
10189    assert_eq!(
10190        driver.dfast_matcher().max_window_size,
10191        base_window,
10192        "retired dictionary budget must not remain reusable for live history"
10193    );
10194}
10195
10196#[test]
10197fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
10198    let mut driver = MatchGeneratorDriver::new(8, 1);
10199    // Route onto HashChain explicitly — `Better` resolves to the Row
10200    // backend in production, and this test pins HC dict-prime behaviour.
10201    driver.reset_on_hc_lazy(CompressionLevel::Better);
10202
10203    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
10204
10205    let mut space = driver.get_next_space();
10206    space.clear();
10207    // Repeat the dictionary content so the HC matcher can find it.
10208    // HC_MIN_MATCH_LEN is 5, so an 8-byte match is well above threshold.
10209    space.extend_from_slice(b"abcdefgh");
10210    driver.commit_space(space);
10211
10212    let mut saw_match = false;
10213    driver.start_matching(|seq| {
10214        if let Sequence::Triple {
10215            literals,
10216            offset,
10217            match_len,
10218        } = seq
10219            && literals.is_empty()
10220            && offset == 8
10221            && match_len >= HC_MIN_MATCH_LEN
10222        {
10223            saw_match = true;
10224        }
10225    });
10226
10227    assert!(
10228        saw_match,
10229        "hash-chain backend should match dictionary-primed history in first full block"
10230    );
10231}
10232
10233#[test]
10234fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
10235    let mut driver = MatchGeneratorDriver::new(8, 1);
10236    driver.reset_on_hc_lazy(CompressionLevel::Better);
10237    // Use a small live window so dictionary-primed slices are evicted quickly.
10238    driver.hc_matcher_mut().table.max_window_size = 8;
10239    driver.reported_window_size = 8;
10240
10241    let base_window = driver.hc_matcher().table.max_window_size;
10242    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
10243    assert_eq!(driver.hc_matcher().table.max_window_size, base_window + 24);
10244
10245    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
10246        let mut space = driver.get_next_space();
10247        space.clear();
10248        space.extend_from_slice(block);
10249        driver.commit_space(space);
10250        driver.skip_matching_with_hint(None);
10251    }
10252
10253    assert_eq!(
10254        driver.dictionary_retained_budget, 0,
10255        "dictionary budget should be fully retired once primed dict slices are evicted"
10256    );
10257    assert_eq!(
10258        driver.hc_matcher().table.max_window_size,
10259        base_window,
10260        "retired dictionary budget must not remain reusable for live history"
10261    );
10262}
10263
10264#[test]
10265fn hc_commit_without_eviction_retires_no_dictionary_budget() {
10266    // Regression: after the window<->history dedup, MatchTable::add_data
10267    // invokes its reuse_space callback for the *input* buffer (recycle),
10268    // not for evicted chunks. The HC arm of commit_space must therefore
10269    // derive eviction bytes from the window_size delta — counting the
10270    // callback argument as evicted would charge the whole committed block
10271    // as "evicted" and prematurely retire dictionary budget even when the
10272    // window is nowhere near full.
10273    let mut driver = MatchGeneratorDriver::new(8, 1);
10274    driver.reset_on_hc_lazy(CompressionLevel::Better);
10275    // A large live window so a small committed block evicts nothing.
10276    driver.hc_matcher_mut().table.max_window_size = 1 << 20;
10277    driver.reported_window_size = 1 << 20;
10278    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
10279    let budget_after_prime = driver.dictionary_retained_budget;
10280    assert!(
10281        budget_after_prime > 0,
10282        "priming must retain a non-zero dictionary budget"
10283    );
10284
10285    let mut space = driver.get_next_space();
10286    space.clear();
10287    space.extend_from_slice(b"AAAAAAAA");
10288    driver.commit_space(space);
10289    driver.skip_matching_with_hint(None);
10290
10291    assert_eq!(
10292        driver.dictionary_retained_budget, budget_after_prime,
10293        "a commit that evicts nothing must retire no dictionary budget"
10294    );
10295}
10296
10297#[test]
10298fn row_commit_without_eviction_retires_no_dictionary_budget() {
10299    // Regression for the Row arm of commit_space after the window ->
10300    // chunk_lens migration: RowMatchGenerator::add_data now invokes its
10301    // reuse_space callback for the *input* buffer (per-commit recycle),
10302    // not for evicted chunks. The Row arm must derive eviction bytes from
10303    // the window_size delta like the Dfast / HashChain arms — counting the
10304    // callback argument as evicted charges the whole committed block as
10305    // "evicted" and prematurely retires dictionary budget even when the
10306    // window is nowhere near full.
10307    let mut driver = MatchGeneratorDriver::new(8, 1);
10308    driver.reset(CompressionLevel::Level(5));
10309    assert!(matches!(driver.storage, MatcherStorage::Row(_)));
10310    // A large live window so a small committed block evicts nothing.
10311    driver.row_matcher_mut().max_window_size = 1 << 20;
10312    driver.reported_window_size = 1 << 20;
10313    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
10314    let budget_after_prime = driver.dictionary_retained_budget;
10315    assert!(
10316        budget_after_prime > 0,
10317        "priming must retain a non-zero dictionary budget"
10318    );
10319
10320    let mut space = driver.get_next_space();
10321    space.clear();
10322    space.extend_from_slice(b"AAAAAAAA");
10323    driver.commit_space(space);
10324    driver.skip_matching_with_hint(None);
10325
10326    assert_eq!(
10327        driver.dictionary_retained_budget, budget_after_prime,
10328        "a Row commit that evicts nothing must retire no dictionary budget"
10329    );
10330}
10331
10332#[test]
10333fn hc_rebases_positions_after_u32_boundary() {
10334    let mut matcher = HcMatchGenerator::new(64);
10335    matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
10336    matcher.table.ensure_tables();
10337    matcher.table.position_base = 0;
10338    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
10339        Ok(value) => value,
10340        Err(_) => return,
10341    };
10342    // Simulate a long-running stream where absolute history positions crossed
10343    // the u32 range. Before #51 this disabled HC inserts entirely.
10344    matcher.table.history_abs_start = history_abs_start;
10345    matcher.skip_matching(None);
10346    assert_eq!(
10347        matcher.table.position_base, matcher.table.history_abs_start,
10348        "rebase should anchor to the oldest live absolute position"
10349    );
10350
10351    assert!(
10352        matcher
10353            .table
10354            .hash_table
10355            .iter()
10356            .any(|entry| *entry != HC_EMPTY),
10357        "HC hash table should still be populated after crossing u32 boundary"
10358    );
10359
10360    // Verify rebasing preserves candidate lookup, not just table population.
10361    let abs_pos = matcher.table.history_abs_start + 10;
10362    let candidates = matcher.hc.chain_candidates(&matcher.table, abs_pos);
10363    assert!(
10364        candidates.iter().any(|candidate| *candidate != usize::MAX),
10365        "chain_candidates should return valid matches after rebase"
10366    );
10367}
10368
10369// 64-bit only: the >4 GiB absolute cursor this test fabricates cannot exist on
10370// a 32-bit target (usize == u32 can't address that much), and setting
10371// `history_abs_start` near `u32::MAX` there overflows `usize` in the
10372// `check_stream_abs_headroom` guard before the rebase path is reached. Mirrors
10373// the `try_into()` early-return guard on `hc_rebases_positions_after_u32_boundary`.
10374#[cfg(target_pointer_width = "64")]
10375#[test]
10376fn row_rebases_positions_after_u32_boundary() {
10377    // Row stores absolute match positions as u32. On a long stream the
10378    // cumulative absolute cursor crosses the u32 range even while the live
10379    // window stays bounded; `add_data` must rebase the coordinate origin
10380    // down to the oldest live byte instead of asserting. Before the rebase
10381    // landed this panicked on the `< u32::MAX` assertion, dropping valid
10382    // long Row-backed frames.
10383    let mut m = RowMatchGenerator::new(64);
10384    m.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
10385
10386    // Simulate ~4 GiB of stream behind a bounded window: the live bytes now
10387    // sit just under the u32 absolute ceiling.
10388    let near_ceiling = (u32::MAX as usize) - 16;
10389    m.history_abs_start = near_ceiling;
10390
10391    // The next commit would push a u32 position past the ceiling; add_data
10392    // must rebase the origin rather than panic.
10393    m.add_data(b"fghij".to_vec(), |_| {});
10394
10395    assert!(
10396        m.history_abs_start < near_ceiling,
10397        "add_data must rebase the absolute origin down when the cursor nears \
10398         u32::MAX (got {})",
10399        m.history_abs_start
10400    );
10401    assert!(
10402        (m.history_abs_start + m.window_size) < u32::MAX as usize,
10403        "after rebase the live window must fit below the u32 position ceiling"
10404    );
10405}
10406
10407#[test]
10408fn hc_rebase_rebuilds_only_inserted_prefix() {
10409    let mut matcher = HcMatchGenerator::new(64);
10410    matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
10411    matcher.table.ensure_tables();
10412    matcher.table.position_base = 0;
10413    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
10414        Ok(value) => value,
10415        Err(_) => return,
10416    };
10417    matcher.table.history_abs_start = history_abs_start;
10418    let abs_pos = matcher.table.history_abs_start + 6;
10419
10420    let mut expected = HcMatchGenerator::new(64);
10421    expected.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
10422    expected.table.ensure_tables();
10423    expected.table.history_abs_start = history_abs_start;
10424    expected.table.position_base = expected.table.history_abs_start;
10425    expected.table.hash_table.fill(HC_EMPTY);
10426    expected.table.chain_table.fill(HC_EMPTY);
10427    for pos in expected.table.history_abs_start..abs_pos {
10428        expected.table.insert_position_no_rebase(pos);
10429    }
10430
10431    matcher.table.maybe_rebase_positions(abs_pos);
10432
10433    assert_eq!(
10434        matcher.table.position_base, matcher.table.history_abs_start,
10435        "rebase should still anchor to the oldest live absolute position"
10436    );
10437    assert_eq!(
10438        matcher.table.hash_table, expected.table.hash_table,
10439        "rebase must rebuild only positions already inserted before abs_pos"
10440    );
10441    assert_eq!(
10442        matcher.table.chain_table, expected.table.chain_table,
10443        "future positions must not be pre-seeded into HC chains during rebase"
10444    );
10445}
10446
10447#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10448#[test]
10449fn suffix_store_with_single_slot_does_not_panic_on_keying() {
10450    let mut suffixes = SuffixStore::with_capacity(1);
10451    suffixes.insert(b"abcde", 0);
10452    assert!(suffixes.contains_key(b"abcde"));
10453    assert_eq!(suffixes.get(b"abcde"), Some(0));
10454}
10455
10456#[cfg(any())]
10457// disabled: hash_fill_step is a legacy MatchGenerator field; FastKernelMatcher walks stride=1 today
10458#[test]
10459fn fastest_reset_uses_interleaved_hash_fill_step() {
10460    let mut driver = MatchGeneratorDriver::new(32, 2);
10461
10462    driver.reset(CompressionLevel::Uncompressed);
10463    assert_eq!(driver.simple().hash_fill_step, 1);
10464
10465    driver.reset(CompressionLevel::Fastest);
10466    assert_eq!(driver.simple().hash_fill_step, FAST_HASH_FILL_STEP);
10467
10468    // Better uses the HashChain backend with lazy2; verify that the backend switch
10469    // happened and the lazy_depth is configured correctly.
10470    driver.reset(CompressionLevel::Better);
10471    assert_eq!(
10472        driver.active_backend(),
10473        super::strategy::BackendTag::HashChain
10474    );
10475    assert_eq!(driver.window_size(), (1u64 << 23));
10476    assert_eq!(driver.hc_matcher().hc.lazy_depth, 2);
10477}
10478
10479#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10480#[test]
10481fn simple_matcher_updates_offset_history_after_emitting_match() {
10482    let mut matcher = MatchGenerator::new(64);
10483    matcher.add_data(
10484        b"abcdeabcdeabcde".to_vec(),
10485        SuffixStore::with_capacity(64),
10486        |_, _| {},
10487    );
10488
10489    assert!(matcher.next_sequence(|seq| {
10490        assert_eq!(
10491            seq,
10492            Sequence::Triple {
10493                literals: b"abcde",
10494                offset: 5,
10495                match_len: 10,
10496            }
10497        );
10498    }));
10499    assert_eq!(matcher.offset_hist, [5, 1, 4]);
10500}
10501
10502#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10503#[test]
10504fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
10505    let mut matcher = MatchGenerator::new(64);
10506    matcher.add_data(
10507        b"abcdefghijabcdefghij".to_vec(),
10508        SuffixStore::with_capacity(64),
10509        |_, _| {},
10510    );
10511
10512    matcher.suffix_idx = 10;
10513    matcher.last_idx_in_sequence = 10;
10514    matcher.offset_hist = [99, 10, 4];
10515
10516    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
10517    assert_eq!(candidate, Some((10, 10)));
10518}
10519
10520#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10521#[test]
10522fn simple_matcher_repcode_can_target_previous_window_entry() {
10523    let mut matcher = MatchGenerator::new(64);
10524    matcher.add_data(
10525        b"abcdefghij".to_vec(),
10526        SuffixStore::with_capacity(64),
10527        |_, _| {},
10528    );
10529    matcher.skip_matching();
10530    matcher.add_data(
10531        b"abcdefghij".to_vec(),
10532        SuffixStore::with_capacity(64),
10533        |_, _| {},
10534    );
10535
10536    matcher.offset_hist = [99, 10, 4];
10537
10538    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
10539    assert_eq!(candidate, Some((10, 10)));
10540}
10541
10542#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10543#[test]
10544fn simple_matcher_zero_literal_repcode_checks_rep2() {
10545    let mut matcher = MatchGenerator::new(64);
10546    matcher.add_data(
10547        b"abcdefghijabcdefghij".to_vec(),
10548        SuffixStore::with_capacity(64),
10549        |_, _| {},
10550    );
10551    matcher.suffix_idx = 10;
10552    matcher.last_idx_in_sequence = 10;
10553    // rep1=4 does not match at idx 10, rep2=10 does.
10554    matcher.offset_hist = [99, 4, 10];
10555
10556    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
10557    assert_eq!(candidate, Some((10, 10)));
10558}
10559
10560#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10561#[test]
10562fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
10563    let mut matcher = MatchGenerator::new(64);
10564    matcher.add_data(
10565        b"abcdefghijabcdefghij".to_vec(),
10566        SuffixStore::with_capacity(64),
10567        |_, _| {},
10568    );
10569    matcher.suffix_idx = 10;
10570    matcher.last_idx_in_sequence = 10;
10571    // rep1=4 and rep2=99 do not match; rep0-1 == 10 does.
10572    matcher.offset_hist = [11, 4, 99];
10573
10574    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
10575    assert_eq!(candidate, Some((10, 10)));
10576}
10577
10578#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10579#[test]
10580fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
10581    let mut matcher = MatchGenerator::new(64);
10582    matcher.add_data(
10583        b"abcdefghij".to_vec(),
10584        SuffixStore::with_capacity(64),
10585        |_, _| {},
10586    );
10587    matcher.skip_matching();
10588    matcher.add_data(
10589        b"klmnopqrst".to_vec(),
10590        SuffixStore::with_capacity(64),
10591        |_, _| {},
10592    );
10593    matcher.suffix_idx = 3;
10594
10595    let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
10596    assert_eq!(candidate, None);
10597}
10598
10599#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10600#[test]
10601fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
10602    let mut matcher = MatchGenerator::new(64);
10603    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10604    matcher.add_data(
10605        b"abcdefghijklmnop".to_vec(),
10606        SuffixStore::with_capacity(64),
10607        |_, _| {},
10608    );
10609    matcher.skip_matching();
10610    matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
10611
10612    assert!(matcher.next_sequence(|seq| {
10613        assert_eq!(
10614            seq,
10615            Sequence::Triple {
10616                literals: b"",
10617                offset: 15,
10618                match_len: 5,
10619            }
10620        );
10621    }));
10622    assert!(!matcher.next_sequence(|_| {}));
10623}
10624
10625#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10626#[test]
10627fn simple_matcher_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
10628    let mut matcher = MatchGenerator::new(128);
10629    let first = b"abcdefghijklmnopqrstuvwxyz012345".to_vec();
10630    let sparse_probe = first[3..3 + MIN_MATCH_LEN].to_vec();
10631    let tail_start = first.len() - MIN_MATCH_LEN;
10632    let tail_probe = first[tail_start..tail_start + MIN_MATCH_LEN].to_vec();
10633    matcher.add_data(first, SuffixStore::with_capacity(256), |_, _| {});
10634
10635    matcher.skip_matching_with_hint(Some(true));
10636
10637    // Observable behavior check: sparse-prefix probe should not immediately match.
10638    matcher.add_data(sparse_probe, SuffixStore::with_capacity(256), |_, _| {});
10639    let mut sparse_first_is_literals = None;
10640    assert!(matcher.next_sequence(|seq| {
10641        if sparse_first_is_literals.is_none() {
10642            sparse_first_is_literals = Some(matches!(seq, Sequence::Literals { .. }));
10643        }
10644    }));
10645    assert!(
10646        sparse_first_is_literals.unwrap_or(false),
10647        "sparse-start probe should not produce an immediate match"
10648    );
10649
10650    // Dense tail remains indexed for cross-block boundary matching.
10651    let mut matcher = MatchGenerator::new(128);
10652    matcher.add_data(
10653        b"abcdefghijklmnopqrstuvwxyz012345".to_vec(),
10654        SuffixStore::with_capacity(256),
10655        |_, _| {},
10656    );
10657    matcher.skip_matching_with_hint(Some(true));
10658    matcher.add_data(tail_probe, SuffixStore::with_capacity(256), |_, _| {});
10659    let mut tail_first_is_immediate_match = None;
10660    assert!(matcher.next_sequence(|seq| {
10661        if tail_first_is_immediate_match.is_none() {
10662            tail_first_is_immediate_match =
10663                Some(matches!(seq, Sequence::Triple { literals, .. } if literals.is_empty()));
10664        }
10665    }));
10666    assert!(
10667        tail_first_is_immediate_match.unwrap_or(false),
10668        "dense tail probe should match immediately at block start"
10669    );
10670}
10671
10672#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10673#[test]
10674fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
10675    let mut matcher = MatchGenerator::new(64);
10676    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10677    matcher.add_data(
10678        b"01234abcde".to_vec(),
10679        SuffixStore::with_capacity(64),
10680        |_, _| {},
10681    );
10682    matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
10683
10684    let last = matcher.window.last().unwrap();
10685    let tail = &last.data[5..10];
10686    assert_eq!(last.suffixes.get(tail), Some(5));
10687}
10688
10689#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10690#[test]
10691fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
10692    let mut matcher = MatchGenerator::new(128);
10693    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10694    matcher.add_data(
10695        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
10696        SuffixStore::with_capacity(1 << 16),
10697        |_, _| {},
10698    );
10699
10700    matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
10701
10702    let last = matcher.window.last().unwrap();
10703    let first_key = &last.data[..MIN_MATCH_LEN];
10704    assert_eq!(last.suffixes.get(first_key), None);
10705}
10706
10707#[cfg(any())] // disabled: tested legacy MatchGenerator/SuffixStore behavior removed in phase 1b
10708#[test]
10709fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
10710    let mut matcher = MatchGenerator::new(128);
10711    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
10712    matcher.add_data(
10713        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
10714        SuffixStore::with_capacity(1 << 16),
10715        |_, _| {},
10716    );
10717
10718    matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
10719
10720    let last = matcher.window.last().unwrap();
10721    for pos in [0usize, 3, 6, 9, 12] {
10722        let key = &last.data[pos..pos + MIN_MATCH_LEN];
10723        assert_eq!(
10724            last.suffixes.get(key),
10725            Some(pos),
10726            "expected interleaved suffix registration at pos {pos}"
10727        );
10728    }
10729}
10730
10731#[test]
10732fn dfast_skip_matching_handles_window_eviction() {
10733    let mut matcher = DfastMatchGenerator::new(16);
10734
10735    matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
10736    matcher.skip_matching(None);
10737    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
10738    matcher.skip_matching(None);
10739    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
10740
10741    let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
10742    matcher.start_matching(|seq| match seq {
10743        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
10744        Sequence::Triple {
10745            literals,
10746            offset,
10747            match_len,
10748        } => {
10749            reconstructed.extend_from_slice(literals);
10750            let start = reconstructed.len() - offset;
10751            for i in 0..match_len {
10752                let byte = reconstructed[start + i];
10753                reconstructed.push(byte);
10754            }
10755        }
10756    });
10757
10758    assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
10759}
10760
10761#[test]
10762fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
10763    let mut matcher = DfastMatchGenerator::new(8);
10764
10765    let mut first = Vec::with_capacity(64);
10766    first.extend_from_slice(b"abcdefgh");
10767    matcher.add_data(first, |_| {});
10768
10769    let mut second = Vec::with_capacity(64);
10770    second.extend_from_slice(b"ijklmnop");
10771
10772    let mut observed_evicted_len = None;
10773    matcher.add_data(second, |data| {
10774        observed_evicted_len = Some(data.len());
10775    });
10776
10777    assert_eq!(
10778        observed_evicted_len,
10779        Some(8),
10780        "eviction callback must report evicted byte length, not backing capacity"
10781    );
10782}
10783
10784/// Regression for the `commit_space` Dfast-branch eviction accounting bug
10785/// (CodeRabbit Critical on PR #146). Old code counted the INPUT buffer
10786/// length as `evicted_bytes` because Dfast's `add_data` callback receives
10787/// the input `Vec<u8>` for pool recycling (Dfast stores bytes in `history`,
10788/// not per-block Vecs). On the saturated-window 1:1 path the two coincide
10789/// so the previous test fixture passed by accident; this test forces the
10790/// divergent case where evicted != input by sequencing block lengths
10791/// `[4, 4, 5]` against `max_window_size = 10`:
10792///
10793///   * after 1st commit: `window_blocks = [4]`, `window_size = 4`
10794///   * after 2nd commit: `window_blocks = [4, 4]`, `window_size = 8`
10795///   * 3rd commit (5 bytes): `8 + 5 > 10` → pop one 4-byte block (evict=4),
10796///     then push 5 (window_size=9). Bug counts `5`, fix counts `4`.
10797///
10798/// The fix derives eviction from `window_size` delta + input length:
10799/// `evicted = pre + space_len - post`. Verified via the
10800/// `dictionary_retained_budget` observable: starting budget 100, after
10801/// the third commit (4 bytes actually evicted) the budget must read 96,
10802/// not 95.
10803/// Driver-path regression for the `commit_space` Dfast eviction accounting
10804/// bug. Exercises `MatchGeneratorDriver::commit_space` directly (not just
10805/// `DfastMatchGenerator::add_data`) so the assertion catches a future
10806/// regression that swaps the Dfast branch in `commit_space` back to
10807/// `evicted_bytes += data.len()` — the older draft of this regression
10808/// hand-recomputed the formula on the matcher and would pass either way.
10809///
10810/// Fixture: `max_window_size = 10`, commit sequence `[4, 4, 5]`. The
10811/// divergent case where the popped block (4 bytes) and the new input
10812/// (5 bytes) have different sizes:
10813///
10814///   * after commit `"abcd"` (4 B): window_blocks=[4], ws=4
10815///   * after commit `"efgh"` (4 B): window_blocks=[4,4], ws=8
10816///   * commit `"ijklm"` (5 B): 8+5>10 → pop front [4] (evict=4),
10817///     push 5 → window_blocks=[4,5], ws=9
10818///
10819/// `commit_space` then calls `retire_dictionary_budget(evicted)`. With
10820/// the fix `evicted=4`; with the bug it would be `evicted=5`. The
10821/// downstream `trim_after_budget_retire` cascade (which fires whenever
10822/// `retire_dictionary_budget` returns true) drives the budget further
10823/// down by trimming the now-oversize window; the final
10824/// `dictionary_retained_budget` differs between the two paths because
10825/// the cascade starting state differs (max_window_size after first
10826/// retire is `10 - evicted`).
10827///
10828/// Tracing the fix path end-to-end with starting budget = 100:
10829///   1st commit: evicted=0, no retire.
10830///   2nd commit: evicted=0, no retire.
10831///   3rd commit: evicted=4. retire(4) → budget=96, max_window=6.
10832///     trim_after_budget_retire:
10833///       iter1: ws=9 > max=6, pop [4] → ws=5, evicted=4.
10834///              retire(4) → budget=92, max_window=2.
10835///       iter2: ws=5 > max=2, pop [5] → ws=0, evicted=5.
10836///              retire(5) → budget=87, max_window=0.
10837///       iter3: ws=0, no trim, retire(0) → false, exit.
10838///   Final budget = 87. Final max_window_size = 0.
10839///
10840/// In the buggy path the 3rd commit would compute `evicted=5`, retire
10841/// would reclaim 5 instead of 4, shrinking max_window_size to 5
10842/// instead of 6 — and then the cascade arithmetic produces a
10843/// different final budget (and on the 2nd commit the cascade would
10844/// already have shrunk max_window_size to 0, causing the 3rd commit
10845/// to panic on `data.len() <= max_window_size`). Either way the
10846/// regression surfaces as a test failure.
10847#[test]
10848fn dfast_commit_space_eviction_uses_window_size_delta() {
10849    use crate::encoding::CompressionLevel;
10850
10851    let mut driver = MatchGeneratorDriver::new(10, 1);
10852    driver.reset(CompressionLevel::Level(3));
10853    assert!(matches!(driver.storage, MatcherStorage::Dfast(_)));
10854
10855    // Override the level-derived window with a tiny one so the
10856    // 4 + 4 + 5 = 13 commit sequence below actually crosses the
10857    // boundary. A 16 KiB+ default window would never evict on this
10858    // little data and the bug would stay invisible.
10859    driver.dfast_matcher_mut().max_window_size = 10;
10860    driver.dictionary_retained_budget = 100;
10861
10862    let mut space1 = Vec::with_capacity(64);
10863    space1.extend_from_slice(b"abcd");
10864    driver.commit_space(space1);
10865    assert_eq!(
10866        driver.dictionary_retained_budget, 100,
10867        "1st commit fills window 0 → 4, no eviction, no retire"
10868    );
10869
10870    let mut space2 = Vec::with_capacity(64);
10871    space2.extend_from_slice(b"efgh");
10872    driver.commit_space(space2);
10873    assert_eq!(
10874        driver.dictionary_retained_budget, 100,
10875        "2nd commit fills window 4 → 8, no eviction, no retire"
10876    );
10877
10878    let mut space3 = Vec::with_capacity(64);
10879    space3.extend_from_slice(b"ijklm");
10880    driver.commit_space(space3);
10881    assert_eq!(
10882        driver.dictionary_retained_budget, 87,
10883        "3rd commit + trim_after_budget_retire cascade. With the fix \
10884         (evicted=4 from window_size delta) the cascade reclaims 100 \
10885         → 96 → 92 → 87. With the bug (evicted=5 from data.len()) the \
10886         3rd commit would panic on `data.len() <= max_window_size` \
10887         after the 2nd commit's cascade had already shrunk \
10888         max_window_size to 0."
10889    );
10890    assert_eq!(
10891        driver.dfast_matcher_mut().max_window_size,
10892        0,
10893        "cascade drains max_window_size to 0 once budget reclaim \
10894         exceeds the initial window size"
10895    );
10896}
10897
10898#[test]
10899fn dfast_trim_to_window_evicts_oldest_block_by_length() {
10900    // After the history-only storage refactor (#111 Phase 7c step 3),
10901    // Dfast no longer retains input `Vec<u8>`s — the `history`
10902    // contiguous buffer is the sole byte store, and `add_data`
10903    // returns the input Vec to the caller's pool eagerly. So
10904    // `trim_to_window` doesn't have anything to hand back to the
10905    // closure (no Vec exists to give). The eviction is observable
10906    // instead through `window_size` shrinking by the per-block
10907    // length recorded in `window_blocks`.
10908    let mut matcher = DfastMatchGenerator::new(16);
10909
10910    let mut first = Vec::with_capacity(64);
10911    first.extend_from_slice(b"abcdefgh");
10912    matcher.add_data(first, |_| {});
10913
10914    let mut second = Vec::with_capacity(64);
10915    second.extend_from_slice(b"ijklmnop");
10916    matcher.add_data(second, |_| {});
10917
10918    assert_eq!(matcher.window_size, 16);
10919    assert_eq!(matcher.window_blocks.len(), 2);
10920
10921    matcher.max_window_size = 8;
10922
10923    matcher.trim_to_window();
10924
10925    // No callback signature to assert on: the Dfast variant of
10926    // `trim_to_window` takes none. That signature shape (vs HC/Row
10927    // which accept `impl FnMut(Vec<u8>)`) is the property locking in
10928    // the contract — there is no closure to invoke or skip, so no
10929    // future change can "start invoking the callback" without a
10930    // compile-time signature break that the dispatcher and this test
10931    // would force the author to address.
10932    assert_eq!(
10933        matcher.window_size, 8,
10934        "exactly one 8-byte block must remain"
10935    );
10936    assert_eq!(matcher.window_blocks.len(), 1);
10937    assert_eq!(matcher.history_abs_start, 8);
10938}
10939
10940#[test]
10941fn dfast_inserts_tail_positions_for_next_block_matching() {
10942    let mut matcher = DfastMatchGenerator::new(1 << 22);
10943
10944    matcher.add_data(b"012345bcdea".to_vec(), |_| {});
10945    let mut history = Vec::new();
10946    matcher.start_matching(|seq| match seq {
10947        Sequence::Literals { literals } => history.extend_from_slice(literals),
10948        Sequence::Triple { .. } => unreachable!("first block should not match history"),
10949    });
10950    assert_eq!(history, b"012345bcdea");
10951
10952    matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
10953    let mut saw_first_sequence = false;
10954    matcher.start_matching(|seq| {
10955        assert!(!saw_first_sequence, "expected a single cross-block match");
10956        saw_first_sequence = true;
10957        match seq {
10958            Sequence::Literals { .. } => {
10959                panic!("expected tail-anchored cross-block match before any literals")
10960            }
10961            Sequence::Triple {
10962                literals,
10963                offset,
10964                match_len,
10965            } => {
10966                assert_eq!(literals, b"");
10967                assert_eq!(offset, 5);
10968                assert_eq!(match_len, 11);
10969                let start = history.len() - offset;
10970                for i in 0..match_len {
10971                    let byte = history[start + i];
10972                    history.push(byte);
10973                }
10974            }
10975        }
10976    });
10977
10978    assert!(
10979        saw_first_sequence,
10980        "expected tail-anchored cross-block match"
10981    );
10982    assert_eq!(history, b"012345bcdeabcdeabcdeab");
10983}
10984
10985/// Regression for #49 — locks down `MatchTable::backfill_boundary_positions`
10986/// for the [`HcMatchGenerator`] lazy path. `backfill_boundary_positions`
10987/// seeds ONLY the last `< 4` bytes of the previous slice (positions in
10988/// `[current_abs_start - 3, current_abs_start)`) — the bytes that
10989/// `insert_position` could not hash at the time because hashing needs
10990/// 4 bytes of lookahead. The existing 8 MiB window roundtrip test
10991/// exercises cross-slice behaviour end-to-end, but does not isolate
10992/// the backfill of those final 1-3 unhashable bytes.
10993///
10994/// Fixture is built so the cross-block match's candidate position
10995/// MUST lie in `[block_1_end - 3, block_1_end)`:
10996///
10997/// - Block 1 = `b"PQRSTBCD"` (8 bytes). Block 1's `start_matching`
10998///   hashes positions 0..=4 (each has 4 bytes of forward context);
10999///   positions 5/6/7 are the unhashable tail.
11000/// - Block 2 = `b"BCDBCDBCDB"` (10 bytes). At absolute position 8
11001///   (block 2 start) the 4-byte window is `b"BCDB"`. The ONLY place
11002///   `b"BCDB"` was inserted in the hash + chain tables is position 5
11003///   — via `backfill_boundary_positions` on the next-slice entry
11004///   (the 4-byte window at position 5 is `data[5..9] = b"BCD" +
11005///   block_2[0] = b"BCDB"`).
11006///
11007/// If `backfill_boundary_positions` regresses, position 5 is never
11008/// hashed, position 8's lookup misses, and the lazy parser falls
11009/// through to a leading literals run — `offset == 3, match_len >= 4`
11010/// would no longer hold.
11011#[test]
11012fn hashchain_inserts_tail_positions_for_next_block_matching() {
11013    let mut matcher = HcMatchGenerator::new(1 << 22);
11014    matcher.configure(HC_CONFIG, super::strategy::StrategyTag::Lazy, 22);
11015
11016    matcher.table.add_data(b"PQRSTBCD".to_vec(), |_| {});
11017    let mut history = alloc::vec::Vec::new();
11018    matcher.start_matching(|seq| match seq {
11019        Sequence::Literals { literals } => history.extend_from_slice(literals),
11020        Sequence::Triple { .. } => unreachable!("first block has no internal repeats"),
11021    });
11022    assert_eq!(history, b"PQRSTBCD");
11023
11024    matcher.table.add_data(b"BCDBCDBCDB".to_vec(), |_| {});
11025    let mut first_sequence_offset: Option<usize> = None;
11026    let mut first_sequence_match_len: Option<usize> = None;
11027    matcher.start_matching(|seq| {
11028        if first_sequence_offset.is_some() {
11029            return;
11030        }
11031        match seq {
11032            Sequence::Literals { .. } => {
11033                panic!(
11034                    "expected tail-anchored cross-block match before any literals — \
11035                     backfill_boundary_positions did not seed positions 5/6/7"
11036                )
11037            }
11038            Sequence::Triple {
11039                literals,
11040                offset,
11041                match_len,
11042            } => {
11043                assert_eq!(literals, b"", "no leading literals on the boundary match");
11044                first_sequence_offset = Some(offset);
11045                first_sequence_match_len = Some(match_len);
11046            }
11047        }
11048    });
11049
11050    let offset = first_sequence_offset.expect(
11051        "expected tail-anchored cross-block match emitted from backfill_boundary_positions",
11052    );
11053    assert!(
11054        (1..=3).contains(&offset),
11055        "boundary match offset {offset} must point into the unhashable tail \
11056         (positions 5/6/7 of an 8-byte block 1) so the test specifically \
11057         locks down backfill_boundary_positions",
11058    );
11059    assert_eq!(
11060        offset, 3,
11061        "candidate position must land at 5 (= block_1_len - 3) so the 4-byte \
11062         window `data[5..9] = b\"BCDB\"` matches block 2's first hash lookup",
11063    );
11064    let match_len = first_sequence_match_len.unwrap();
11065    assert!(
11066        match_len >= HC_MIN_MATCH_LEN,
11067        "match_len {match_len} must clear the HC min-match floor",
11068    );
11069}
11070
11071#[test]
11072fn dfast_dense_skip_matching_backfills_previous_tail_for_next_block() {
11073    let mut matcher = DfastMatchGenerator::new(1 << 22);
11074    let tail = b"Qz9kLm2Rp";
11075    let mut first = b"0123456789abcdef".to_vec();
11076    first.extend_from_slice(tail);
11077    matcher.add_data(first.clone(), |_| {});
11078    matcher.skip_matching(Some(false));
11079
11080    let mut second = tail.to_vec();
11081    second.extend_from_slice(b"after-tail-literals");
11082    matcher.add_data(second, |_| {});
11083
11084    let mut first_sequence = None;
11085    matcher.start_matching(|seq| {
11086        if first_sequence.is_some() {
11087            return;
11088        }
11089        first_sequence = Some(match seq {
11090            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
11091            Sequence::Triple {
11092                literals,
11093                offset,
11094                match_len,
11095            } => (literals.len(), offset, match_len),
11096        });
11097    });
11098
11099    let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
11100    assert_eq!(
11101        lit_len, 0,
11102        "expected immediate cross-block match at block start"
11103    );
11104    assert_eq!(
11105        offset,
11106        tail.len(),
11107        "expected dense skip to preserve cross-boundary tail match"
11108    );
11109    assert!(
11110        match_len >= DFAST_MIN_MATCH_LEN,
11111        "match length should satisfy dfast minimum match length"
11112    );
11113}
11114
11115#[test]
11116fn dfast_sparse_skip_matching_preserves_tail_cross_block_match() {
11117    let mut matcher = DfastMatchGenerator::new(1 << 22);
11118    let tail = b"Qz9kLm2Rp";
11119    let mut first = deterministic_high_entropy_bytes(0x9E37_79B9_7F4A_7C15, 4096);
11120    let tail_start = first.len() - tail.len();
11121    first[tail_start..].copy_from_slice(tail);
11122    matcher.add_data(first.clone(), |_| {});
11123
11124    matcher.skip_matching(Some(true));
11125
11126    let mut second = tail.to_vec();
11127    second.extend_from_slice(b"after-tail-literals");
11128    matcher.add_data(second, |_| {});
11129
11130    let mut first_sequence = None;
11131    matcher.start_matching(|seq| {
11132        if first_sequence.is_some() {
11133            return;
11134        }
11135        first_sequence = Some(match seq {
11136            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
11137            Sequence::Triple {
11138                literals,
11139                offset,
11140                match_len,
11141            } => (literals.len(), offset, match_len),
11142        });
11143    });
11144
11145    let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
11146    assert_eq!(
11147        lit_len, 0,
11148        "expected immediate cross-block match at block start"
11149    );
11150    assert_eq!(
11151        offset,
11152        tail.len(),
11153        "expected match against densely seeded tail"
11154    );
11155    assert!(
11156        match_len >= DFAST_MIN_MATCH_LEN,
11157        "match length should satisfy dfast minimum match length"
11158    );
11159}
11160
11161#[test]
11162fn dfast_skip_matching_dense_backfills_newly_hashable_long_tail_positions() {
11163    let mut matcher = DfastMatchGenerator::new(1 << 22);
11164    let first = deterministic_high_entropy_bytes(0x7A64_0315_D4E1_91C3, 4096);
11165    let first_len = first.len();
11166    matcher.add_data(first, |_| {});
11167    matcher.skip_matching_dense();
11168
11169    // Appending one byte makes exactly the previous block's last 7 starts
11170    // newly eligible for 8-byte long-hash insertion.
11171    matcher.add_data(alloc::vec![0xAB], |_| {});
11172    matcher.skip_matching_dense();
11173
11174    let target_abs_pos = first_len - 7;
11175    let target_rel = target_abs_pos - matcher.history_abs_start;
11176    let live = matcher.live_history();
11177    assert!(
11178        target_rel + 8 <= live.len(),
11179        "fixture must make the boundary start long-hashable"
11180    );
11181    let long_hash = matcher.long_hash_index(&live[target_rel..]);
11182    let target_slot = matcher.pack_slot(target_abs_pos);
11183    // Single-slot tables (donor parity): the bucket holds at most one
11184    // u32; the assertion below is a direct equality (no `.contains`).
11185    assert_ne!(
11186        target_slot, DFAST_EMPTY_SLOT,
11187        "pack_slot must never return the empty-slot sentinel for a real position"
11188    );
11189    assert_eq!(
11190        matcher.long_hash[long_hash], target_slot,
11191        "dense skip must seed long-hash entry for newly hashable boundary start"
11192    );
11193}
11194
11195#[test]
11196fn dfast_seed_remaining_hashable_starts_seeds_last_short_hash_positions() {
11197    let mut matcher = DfastMatchGenerator::new(1 << 20);
11198    let block = deterministic_high_entropy_bytes(0x13F0_9A6D_55CE_7B21, 64);
11199    matcher.add_data(block, |_| {});
11200    matcher.ensure_hash_tables();
11201
11202    let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
11203    let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
11204    let seed_start = current_len - DFAST_MIN_MATCH_LEN;
11205    matcher.seed_remaining_hashable_starts(current_abs_start, current_len, seed_start);
11206
11207    let target_abs_pos = current_abs_start + current_len - 5;
11208    let target_rel = target_abs_pos - matcher.history_abs_start;
11209    let live = matcher.live_history();
11210    assert!(
11211        target_rel + 5 <= live.len(),
11212        "fixture must leave the last short-hash start valid"
11213    );
11214    let short_hash = matcher.short_hash_index(&live[target_rel..]);
11215    let target_slot = matcher.pack_slot(target_abs_pos);
11216    assert_ne!(
11217        target_slot, DFAST_EMPTY_SLOT,
11218        "pack_slot must never return the empty-slot sentinel for a real position"
11219    );
11220    assert_eq!(
11221        matcher.short_hash[short_hash], target_slot,
11222        "tail seeding must include the last 5-byte-hashable start"
11223    );
11224}
11225
11226#[test]
11227fn dfast_seed_remaining_hashable_starts_handles_pos_at_block_end() {
11228    let mut matcher = DfastMatchGenerator::new(1 << 20);
11229    let block = deterministic_high_entropy_bytes(0x7BB2_DA91_441E_C0EF, 64);
11230    matcher.add_data(block, |_| {});
11231    matcher.ensure_hash_tables();
11232
11233    let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
11234    let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
11235    matcher.seed_remaining_hashable_starts(current_abs_start, current_len, current_len);
11236
11237    let target_abs_pos = current_abs_start + current_len - 5;
11238    let target_rel = target_abs_pos - matcher.history_abs_start;
11239    let live = matcher.live_history();
11240    assert!(
11241        target_rel + 5 <= live.len(),
11242        "fixture must leave the last short-hash start valid"
11243    );
11244    let short_hash = matcher.short_hash_index(&live[target_rel..]);
11245    let target_slot = matcher.pack_slot(target_abs_pos);
11246    assert_ne!(
11247        target_slot, DFAST_EMPTY_SLOT,
11248        "pack_slot must never return the empty-slot sentinel for a real position"
11249    );
11250    assert_eq!(
11251        matcher.short_hash[short_hash], target_slot,
11252        "tail seeding must still include the last 5-byte-hashable start when pos is at block end"
11253    );
11254}
11255
11256/// `ensure_room_for` must trigger `reduce()` when the requested
11257/// absolute position would push a relative offset past
11258/// `u32::MAX - DFAST_REBASE_GUARD_BAND`. After the rebase, the
11259/// pre-existing entry at a much-smaller absolute position falls
11260/// below `reducer` and gets cleared to `DFAST_EMPTY_SLOT`; a fresh
11261/// insert at the boundary position must `pack_slot` to a valid
11262/// non-sentinel value that `unpack_slot` resolves back to the same
11263/// absolute position. Mirrors `LdmHashTable::ensure_room_for_*`
11264/// from PR #139.
11265///
11266/// Runs on every target — `trigger_abs = u32::MAX -
11267/// DFAST_REBASE_GUARD_BAND + 1 = 0xC0000000`, which fits in `usize`
11268/// on i686 (`usize::MAX = u32::MAX`) without overflow, so the
11269/// packed-slot boundary path + u32 ↔ usize round-trip is exercised
11270/// on every pointer width we ship.
11271#[test]
11272fn dfast_ensure_room_for_rebases_above_guard_band() {
11273    let mut dfast = DfastMatchGenerator::new(1 << 22);
11274    dfast.set_hash_bits(10, 10);
11275    dfast.ensure_hash_tables();
11276
11277    // Seed an early insert near the current base in BOTH tables.
11278    // `ensure_room_for` / `reduce` is a shared contract for both
11279    // `short_hash` and `long_hash`; without seeding both, a
11280    // regression that only cleared short_hash would still pass.
11281    // Direct `pack_slot` + bucket write keeps the test focused on
11282    // the rebase mechanics and avoids dragging in the full
11283    // `insert_position` flow with its history/window setup.
11284    let early_abs = 1024usize;
11285    let early_packed = dfast.pack_slot(early_abs);
11286    assert_ne!(early_packed, DFAST_EMPTY_SLOT);
11287    dfast.short_hash[0] = early_packed;
11288    dfast.long_hash[0] = early_packed;
11289
11290    // Pick a trigger position that forces the first rebase. With
11291    // `position_base = 0`, the smallest `abs_pos` that fails the
11292    // `rel <= max_rel` test is `u32::MAX - DFAST_REBASE_GUARD_BAND
11293    // + 1`. After one `reduce(DFAST_REBASE_GUARD_BAND)` the base
11294    // advances by `DFAST_REBASE_GUARD_BAND`.
11295    let trigger_abs = (u32::MAX as usize) - (DFAST_REBASE_GUARD_BAND as usize) + 1;
11296    assert_eq!(dfast.position_base, 0);
11297    dfast.ensure_room_for(trigger_abs);
11298    assert_eq!(
11299        dfast.position_base, DFAST_REBASE_GUARD_BAND as usize,
11300        "rebase must advance position_base by DFAST_REBASE_GUARD_BAND"
11301    );
11302
11303    // The early entry at abs=1024 had packed slot 1025; the rebase
11304    // subtracts `DFAST_REBASE_GUARD_BAND` (= 2^30) from every slot.
11305    // 1025 <= 2^30 so the slot drops to the empty sentinel —
11306    // donor parity for `ZSTD_window_reduce`'s clamp-at-zero rule.
11307    // Verify BOTH tables — `reduce()` walks them in sequence.
11308    assert_eq!(
11309        dfast.short_hash[0], DFAST_EMPTY_SLOT,
11310        "pre-rebase short-hash entries below the reducer must become empty"
11311    );
11312    assert_eq!(
11313        dfast.long_hash[0], DFAST_EMPTY_SLOT,
11314        "pre-rebase long-hash entries below the reducer must become empty"
11315    );
11316
11317    // A fresh insert past the rebase boundary must round-trip:
11318    // pack to a non-sentinel value, then unpack back to the same
11319    // absolute position via `position_base + slot - 1`.
11320    let post_packed = dfast.pack_slot(trigger_abs);
11321    assert_ne!(post_packed, DFAST_EMPTY_SLOT);
11322    let unpacked = dfast.position_base + (post_packed as usize) - 1;
11323    assert_eq!(
11324        unpacked, trigger_abs,
11325        "post-rebase pack/unpack must round-trip the absolute position"
11326    );
11327}
11328
11329#[test]
11330fn dfast_sparse_skip_matching_backfills_previous_tail_for_consecutive_sparse_blocks() {
11331    let mut matcher = DfastMatchGenerator::new(1 << 22);
11332    let boundary_prefix = [0xFA, 0xFB, 0xFC];
11333    let boundary_suffix = [0xFD, 0xEE, 0xAD, 0xBE, 0xEF, 0x11, 0x22, 0x33];
11334
11335    let mut first = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
11336    let first_tail_start = first.len() - boundary_prefix.len();
11337    first[first_tail_start..].copy_from_slice(&boundary_prefix);
11338    matcher.add_data(first, |_| {});
11339    matcher.skip_matching(Some(true));
11340
11341    let mut second = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
11342    second[..boundary_suffix.len()].copy_from_slice(&boundary_suffix);
11343    matcher.add_data(second.clone(), |_| {});
11344    matcher.skip_matching(Some(true));
11345
11346    let mut third = boundary_prefix.to_vec();
11347    third.extend_from_slice(&boundary_suffix);
11348    third.extend_from_slice(b"-trailing-literals");
11349    matcher.add_data(third, |_| {});
11350
11351    let mut first_sequence = None;
11352    matcher.start_matching(|seq| {
11353        if first_sequence.is_some() {
11354            return;
11355        }
11356        first_sequence = Some(match seq {
11357            Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
11358            Sequence::Triple {
11359                literals,
11360                offset,
11361                match_len,
11362            } => (literals.len(), offset, match_len),
11363        });
11364    });
11365
11366    let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
11367    assert_eq!(
11368        lit_len, 0,
11369        "expected immediate match from the prior sparse-skip boundary"
11370    );
11371    assert_eq!(
11372        offset,
11373        second.len() + boundary_prefix.len(),
11374        "expected match against backfilled first→second boundary start"
11375    );
11376    assert!(
11377        match_len >= DFAST_MIN_MATCH_LEN,
11378        "match length should satisfy dfast minimum match length"
11379    );
11380}
11381
11382#[test]
11383fn fastest_hint_iteration_23_sequences_reconstruct_source() {
11384    fn generate_data(seed: u64, len: usize) -> Vec<u8> {
11385        let mut state = seed;
11386        let mut data = Vec::with_capacity(len);
11387        for _ in 0..len {
11388            state = state
11389                .wrapping_mul(6364136223846793005)
11390                .wrapping_add(1442695040888963407);
11391            data.push((state >> 33) as u8);
11392        }
11393        data
11394    }
11395
11396    let i = 23u64;
11397    let len = (i * 89 % 16384) as usize;
11398    let mut data = generate_data(i, len);
11399    // Append a repeated slice so the fixture deterministically exercises
11400    // the match path (Sequence::Triple) instead of only literals.
11401    let repeat = data[128..256].to_vec();
11402    data.extend_from_slice(&repeat);
11403    data.extend_from_slice(&repeat);
11404
11405    let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
11406    driver.set_source_size_hint(data.len() as u64);
11407    driver.reset(CompressionLevel::Fastest);
11408    let mut space = driver.get_next_space();
11409    space[..data.len()].copy_from_slice(&data);
11410    space.truncate(data.len());
11411    driver.commit_space(space);
11412
11413    let mut rebuilt = Vec::with_capacity(data.len());
11414    let mut saw_triple = false;
11415    driver.start_matching(|seq| match seq {
11416        Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
11417        Sequence::Triple {
11418            literals,
11419            offset,
11420            match_len,
11421        } => {
11422            saw_triple = true;
11423            rebuilt.extend_from_slice(literals);
11424            assert!(offset > 0, "offset must be non-zero");
11425            assert!(
11426                offset <= rebuilt.len(),
11427                "offset must reference already-produced bytes: offset={} produced={}",
11428                offset,
11429                rebuilt.len()
11430            );
11431            let start = rebuilt.len() - offset;
11432            for idx in 0..match_len {
11433                let b = rebuilt[start + idx];
11434                rebuilt.push(b);
11435            }
11436        }
11437    });
11438
11439    // Whether THIS specific iteration produces a Triple depends on
11440    // the matcher's step-skip schedule (donor-shape kernel walks ip0
11441    // with kSearchStrength-driven stride growth) — the legacy
11442    // SuffixStore-based matcher iterated every position and always
11443    // hit short repeats, but the donor-shape kernel may skip over
11444    // them when the step has grown large by the time it reaches the
11445    // repeat region. The substance of this test is the
11446    // reconstruction assertion below; `saw_triple` was a legacy
11447    // tuning preference, not a correctness invariant.
11448    let _ = saw_triple;
11449    assert_eq!(rebuilt, data);
11450}
11451
11452#[test]
11453fn fast_levels_dispatch_per_level_hash_log_and_mls() {
11454    // Level 1 — donor `{ 19, 13, 14, 1, 7, 0, ZSTD_fast }` row:
11455    // window_log=19, hash_log=14, mls=7.
11456    let f1 = resolve_level_params(CompressionLevel::Level(1), None)
11457        .fast
11458        .unwrap();
11459    assert_eq!(f1.hash_log, 14);
11460    assert_eq!(f1.mls, 7);
11461    assert_eq!(f1.step_size, 2);
11462
11463    // Negative levels — donor row-0 ("base for negative"):
11464    // hash_log=13, mls=7. The 32 KiB table is L1d-resident (every
11465    // probe an L1 hit, vs an L2 access for a 64 KiB hash_log=14
11466    // table), and minMatch=7 drops short-distance 6-byte matches —
11467    // donor parity on both ratio and throughput.
11468    // step_size follows donor's formula: targetLength = -level,
11469    // step_size = (-level) + 1, giving 2..8 for L-1..L-7.
11470    for n in -7..=-1 {
11471        let f = resolve_level_params(CompressionLevel::Level(n), None)
11472            .fast
11473            .unwrap();
11474        assert_eq!(f.hash_log, 13, "Level({n}) fast_hash_log");
11475        assert_eq!(f.mls, 7, "Level({n}) fast_mls");
11476        let expected_step = ((-n) as usize) + 1;
11477        assert_eq!(f.step_size, expected_step, "Level({n}) fast_step_size");
11478    }
11479
11480    // Fastest + Uncompressed keep hash_log=14 / mls=6 (their own
11481    // tuning; not part of the negative-level donor ladder).
11482    let pf = resolve_level_params(CompressionLevel::Fastest, None);
11483    let ff = pf.fast.unwrap();
11484    assert_eq!(
11485        (pf.window_log, ff.hash_log, ff.mls, ff.step_size),
11486        (19, 14, 6, 2),
11487    );
11488    // Uncompressed keeps window_log=17 (no history references, smaller
11489    // decoder reservation); fast cParams same as negative-base row.
11490    let pu = resolve_level_params(CompressionLevel::Uncompressed, None);
11491    let fu = pu.fast.unwrap();
11492    assert_eq!(
11493        (pu.window_log, fu.hash_log, fu.mls, fu.step_size),
11494        (17, 14, 6, 2),
11495    );
11496}
11497
11498/// Exercise the actual driver wiring: for every Fast level, reset a
11499/// `MatchGeneratorDriver` and assert the inner `FastKernelMatcher`
11500/// observed the same `(hash_log, mls, step_size)` tuple that
11501/// `resolve_level_params` reports. Catches plumbing bugs — argument
11502/// reordering, stale step_size carried from a prior frame,
11503/// stuck-on-default values — that the parameter-only test above
11504/// would miss.
11505#[test]
11506fn fast_levels_driver_wiring_threads_cparams_into_inner_matcher() {
11507    let mut driver = MatchGeneratorDriver::new(64 * 1024, 1);
11508
11509    let fast_levels = [
11510        CompressionLevel::Level(1),
11511        CompressionLevel::Fastest,
11512        CompressionLevel::Uncompressed,
11513        CompressionLevel::Level(-1),
11514        CompressionLevel::Level(-2),
11515        CompressionLevel::Level(-3),
11516        CompressionLevel::Level(-4),
11517        CompressionLevel::Level(-5),
11518        CompressionLevel::Level(-6),
11519        CompressionLevel::Level(-7),
11520    ];
11521
11522    for &level in &fast_levels {
11523        let p = resolve_level_params(level, None);
11524        // Sanity: every level in the table above must resolve to a
11525        // Fast-strategy row — otherwise this test isn't testing what
11526        // it claims to test.
11527        assert_eq!(
11528            p.strategy_tag,
11529            super::strategy::StrategyTag::Fast,
11530            "{level:?} must resolve to Fast strategy",
11531        );
11532
11533        // Bounce through a non-Fast strategy first so the next
11534        // reset actually goes through the backend-switch path
11535        // (`MatchGeneratorDriver::new` / `simple_mut` recreate the
11536        // Fast variant via `FastKernelMatcher::with_params`). Without
11537        // this hop the loop would only ever stay in `BackendTag::Simple`
11538        // and exercise `FastKernelMatcher::reset` — leaving the
11539        // `with_params` wiring untested on the production path.
11540        // `Default` resolves to Dfast strategy (a non-Fast row),
11541        // which is enough to force the swap.
11542        crate::encoding::Matcher::reset(&mut driver, CompressionLevel::Default);
11543
11544        // Drive the production reset path (same code paths exercised
11545        // by FrameCompressor / StreamingEncoder).
11546        crate::encoding::Matcher::reset(&mut driver, level);
11547
11548        let f = p.fast.unwrap();
11549        let m = driver.simple_mut();
11550        assert_eq!(
11551            m.hash_log(),
11552            f.hash_log,
11553            "{level:?}: inner matcher hash_log mismatch — argument swap?",
11554        );
11555        assert_eq!(
11556            m.mls(),
11557            f.mls,
11558            "{level:?}: inner matcher mls mismatch — argument swap?",
11559        );
11560        assert_eq!(
11561            m.step_size(),
11562            f.step_size,
11563            "{level:?}: inner matcher step_size mismatch — stale value carried from prior reset?",
11564        );
11565    }
11566}
11567
11568/// Pins `hc.target_len` to the reference `cParams.targetLength` from
11569/// `clevels.h` table[0] (default — `srcSize > 256 KB`) across levels
11570/// 5-15. The reference's lazy outer loop treats `targetLength` as
11571/// `sufficient_len` — the "nice match" threshold that breaks the chain
11572/// walk as soon as a candidate reaches that length.
11573///
11574/// Levels 13-15 run btlazy2 in the reference and the hash-chain Lazy
11575/// parser here, but the reference `targetLength` (32) is the same nice-match
11576/// threshold for both finders, so we mirror it directly.
11577///
11578/// Test queries the reference via `ZSTD_getCParams(level, 0, 0)` so any
11579/// future table tweak upstream is reflected automatically.
11580#[test]
11581fn lazy_band_target_len_matches_donor_default_table() {
11582    use zstd::zstd_safe::zstd_sys;
11583
11584    for level in 5..=15i32 {
11585        // SAFETY: `ZSTD_getCParams` reads from a static table; safe to
11586        // call with any (level, srcSize, dictSize) combination.
11587        let reference = unsafe { zstd_sys::ZSTD_getCParams(level, 0, 0) };
11588        let params = resolve_level_params(CompressionLevel::Level(level), None);
11589        // L5 = greedy (Row backend → `row`); L6-15 = lazy (HashChain → `hc`).
11590        // Both surface the donor `targetLength` as their nice-match threshold.
11591        let target_len = params
11592            .hc
11593            .map(|hc| hc.target_len)
11594            .or_else(|| params.row.map(|row| row.target_len))
11595            .expect("lazy/greedy level carries hc or row config");
11596        assert_eq!(
11597            target_len as u32, reference.targetLength,
11598            "L{level}: target_len ({target_len}) must match reference cParams.targetLength ({})",
11599            reference.targetLength
11600        );
11601    }
11602}
11603
11604/// Levels 13-15 mirror the reference btlazy2 window/hash/chain/search
11605/// budget from `clevels.h` table[0]: `search_depth == 1 << cParams.searchLog`
11606/// (16 / 32 / 64) plus `window_log` / `hash_log` / `chain_log` equal to the
11607/// reference `windowLog` / `hashLog` / `chainLog`. We run them on the
11608/// hash-chain Lazy parser rather than a binary-tree finder, so they do not
11609/// re-establish a strict ratio ladder above L12 on window-fitting inputs;
11610/// asserting the full row (not just `search_depth`) keeps the whole budget
11611/// aligned and guards every field against silent drift.
11612#[test]
11613fn upper_lazy_band_params_match_donor_default_table() {
11614    use zstd::zstd_safe::zstd_sys;
11615
11616    for level in 13..=15i32 {
11617        // SAFETY: `ZSTD_getCParams` reads from a static table; safe to
11618        // call with any (level, srcSize, dictSize) combination.
11619        let reference = unsafe { zstd_sys::ZSTD_getCParams(level, 0, 0) };
11620        let params = resolve_level_params(CompressionLevel::Level(level), None);
11621        let hc = params.hc.unwrap();
11622        assert_eq!(
11623            hc.search_depth as u32,
11624            1u32 << reference.searchLog,
11625            "L{level}: hc.search_depth ({}) must equal 1<<cParams.searchLog ({})",
11626            hc.search_depth,
11627            1u32 << reference.searchLog
11628        );
11629        assert_eq!(
11630            params.window_log as u32, reference.windowLog,
11631            "L{level}: window_log ({}) must equal cParams.windowLog ({})",
11632            params.window_log, reference.windowLog
11633        );
11634        assert_eq!(
11635            hc.hash_log as u32, reference.hashLog,
11636            "L{level}: hc.hash_log ({}) must equal cParams.hashLog ({})",
11637            hc.hash_log, reference.hashLog
11638        );
11639        assert_eq!(
11640            hc.chain_log as u32, reference.chainLog,
11641            "L{level}: hc.chain_log ({}) must equal cParams.chainLog ({})",
11642            hc.chain_log, reference.chainLog
11643        );
11644    }
11645}
structured_zstd/encoding/match_generator.rs

structured_zstd/encoding/
match_generator.rs