Skip to main content

structured_zstd/encoding/
match_generator.rs

1//! Matching algorithm used find repeated parts in the original data
2//!
3//! The Zstd format relies on finden repeated sequences of data and compressing these sequences as instructions to the decoder.
4//! A sequence basically tells the decoder "Go back X bytes and copy Y bytes to the end of your decode buffer".
5//!
6//! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data.
7
8use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
11use core::arch::aarch64::{uint8x16_t, vceqq_u8, vgetq_lane_u64, vld1q_u8, vreinterpretq_u64_u8};
12#[cfg(target_arch = "x86")]
13use core::arch::x86::{
14    __m128i, __m256i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm256_cmpeq_epi8,
15    _mm256_loadu_si256, _mm256_movemask_epi8,
16};
17#[cfg(target_arch = "x86_64")]
18use core::arch::x86_64::{
19    __m128i, __m256i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm256_cmpeq_epi8,
20    _mm256_loadu_si256, _mm256_movemask_epi8,
21};
22use core::convert::TryInto;
23use core::num::NonZeroUsize;
24
25use super::CompressionLevel;
26use super::Matcher;
27use super::Sequence;
28use super::blocks::encode_offset_with_history;
29#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
30use std::arch::is_aarch64_feature_detected;
31#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
32use std::arch::is_x86_feature_detected;
33#[cfg(feature = "std")]
34use std::sync::OnceLock;
35
36const MIN_MATCH_LEN: usize = 5;
37const FAST_HASH_FILL_STEP: usize = 3;
38const DFAST_MIN_MATCH_LEN: usize = 6;
39const ROW_MIN_MATCH_LEN: usize = 6;
40const DFAST_TARGET_LEN: usize = 48;
41// Keep these aligned with the issue's zstd level-3/dfast target unless ratio
42// measurements show we can shrink them without regressing acceptance tests.
43const DFAST_HASH_BITS: usize = 20;
44const DFAST_SEARCH_DEPTH: usize = 4;
45const DFAST_EMPTY_SLOT: usize = usize::MAX;
46const ROW_HASH_BITS: usize = 20;
47const ROW_LOG: usize = 5;
48const ROW_SEARCH_DEPTH: usize = 16;
49const ROW_TARGET_LEN: usize = 48;
50const ROW_TAG_BITS: usize = 8;
51const ROW_EMPTY_SLOT: usize = usize::MAX;
52const ROW_HASH_KEY_LEN: usize = 4;
53
54const HC_HASH_LOG: usize = 20;
55const HC_CHAIN_LOG: usize = 19;
56const HC_SEARCH_DEPTH: usize = 16;
57const HC_MIN_MATCH_LEN: usize = 5;
58const HC_TARGET_LEN: usize = 48;
59// Positions are stored as (relative_pos + 1) so that 0 is a safe empty
60// sentinel that can never collide with any valid position.
61const HC_EMPTY: u32 = 0;
62
63// Maximum search depth across all HC-based levels. Used to size the
64// fixed-length candidate array returned by chain_candidates().
65const MAX_HC_SEARCH_DEPTH: usize = 32;
66
67#[derive(Copy, Clone, Debug, Eq, PartialEq)]
68enum PrefixKernel {
69    Scalar,
70    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
71    X86Sse2,
72    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
73    X86Avx2,
74    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
75    Aarch64Neon,
76}
77
78/// Bundled tuning knobs for the hash-chain matcher. Using a typed config
79/// instead of positional `usize` args eliminates parameter-order hazards.
80#[derive(Copy, Clone)]
81struct HcConfig {
82    hash_log: usize,
83    chain_log: usize,
84    search_depth: usize,
85    target_len: usize,
86}
87
88#[derive(Copy, Clone)]
89struct RowConfig {
90    hash_bits: usize,
91    row_log: usize,
92    search_depth: usize,
93    target_len: usize,
94}
95
96const HC_CONFIG: HcConfig = HcConfig {
97    hash_log: HC_HASH_LOG,
98    chain_log: HC_CHAIN_LOG,
99    search_depth: HC_SEARCH_DEPTH,
100    target_len: HC_TARGET_LEN,
101};
102
103/// Best-level: deeper search, larger tables, higher target length.
104const BEST_HC_CONFIG: HcConfig = HcConfig {
105    hash_log: 21,
106    chain_log: 20,
107    search_depth: 32,
108    target_len: 128,
109};
110
111const ROW_CONFIG: RowConfig = RowConfig {
112    hash_bits: ROW_HASH_BITS,
113    row_log: ROW_LOG,
114    search_depth: ROW_SEARCH_DEPTH,
115    target_len: ROW_TARGET_LEN,
116};
117
118/// Resolved tuning parameters for a compression level.
119#[derive(Copy, Clone)]
120struct LevelParams {
121    backend: MatcherBackend,
122    window_log: u8,
123    hash_fill_step: usize,
124    lazy_depth: u8,
125    hc: HcConfig,
126    row: RowConfig,
127}
128
129fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
130    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
131    window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
132}
133
134fn row_hash_bits_for_window(max_window_size: usize) -> usize {
135    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
136    window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
137}
138
139/// Parameter table for numeric compression levels 1–22.
140///
141/// Each entry maps a zstd compression level to the best-available matcher
142/// backend and tuning knobs.  Levels that require strategies this crate does
143/// not implement (greedy, btopt, btultra) are approximated with the closest
144/// available backend.
145///
146/// Index 0 = level 1, index 21 = level 22.
147#[rustfmt::skip]
148const LEVEL_TABLE: [LevelParams; 22] = [
149    // Lvl  Strategy       wlog  step  lazy  HC config                                   row config
150    // ---  -------------- ----  ----  ----  ------------------------------------------  ----------
151    /* 1 */ LevelParams { backend: MatcherBackend::Simple,    window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
152    /* 2 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
153    /* 3 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
154    /* 4 */ LevelParams { backend: MatcherBackend::Row,       window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
155    /* 5 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4,  target_len: 32  }, row: ROW_CONFIG },
156    /* 6 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8,  target_len: 48  }, row: ROW_CONFIG },
157    /* 7 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48  }, row: ROW_CONFIG },
158    /* 8 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64  }, row: ROW_CONFIG },
159    /* 9 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64  }, row: ROW_CONFIG },
160    /*10 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96  }, row: ROW_CONFIG },
161    /*11 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
162    /*12 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
163    /*13 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
164    /*14 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
165    /*15 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
166    /*16 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
167    /*17 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
168    /*18 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
169    /*19 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
170    /*20 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
171    /*21 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
172    /*22 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
173];
174
175/// Smallest window_log the encoder will use regardless of source size.
176const MIN_WINDOW_LOG: u8 = 10;
177/// Conservative floor for source-size-hinted window tuning.
178///
179/// Hinted windows below 16 KiB (`window_log < 14`) currently regress C-FFI
180/// interoperability on certain compressed-block patterns. Keep hinted
181/// windows at 16 KiB or larger until that compatibility gap is closed.
182const MIN_HINTED_WINDOW_LOG: u8 = 14;
183
184/// Adjust level parameters for a known source size.
185///
186/// This derives a cap from `ceil(log2(src_size))`, then clamps it to
187/// [`MIN_HINTED_WINDOW_LOG`] (16 KiB). A zero-byte size hint is treated as
188/// [`MIN_WINDOW_LOG`] for the raw ceil-log step and then promoted to the hinted
189/// floor. This keeps tables bounded for small inputs while preserving the
190/// encoder's baseline minimum supported window.
191/// For the HC backend, `hash_log` and `chain_log` are reduced
192/// proportionally.
193fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
194    // Derive a source-size-based cap from ceil(log2(src_size)), then
195    // clamp first to MIN_WINDOW_LOG (baseline encoder minimum) and then to
196    // MIN_HINTED_WINDOW_LOG (16 KiB hinted floor). For tiny or zero hints we
197    // therefore keep a 16 KiB effective minimum window in hinted mode.
198    let src_log = if src_size == 0 {
199        MIN_WINDOW_LOG
200    } else {
201        (64 - (src_size - 1).leading_zeros()) as u8 // ceil_log2
202    };
203    let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
204    if src_log < params.window_log {
205        params.window_log = src_log;
206    }
207    // For HC backend: also cap hash_log and chain_log so tables are
208    // proportional to the source, avoiding multi-MB allocations for
209    // tiny inputs.
210    if params.backend == MatcherBackend::HashChain {
211        if (src_log + 2) < params.hc.hash_log as u8 {
212            params.hc.hash_log = (src_log + 2) as usize;
213        }
214        if (src_log + 1) < params.hc.chain_log as u8 {
215            params.hc.chain_log = (src_log + 1) as usize;
216        }
217    } else if params.backend == MatcherBackend::Row {
218        let max_window_size = 1usize << params.window_log;
219        params.row.hash_bits = row_hash_bits_for_window(max_window_size);
220    }
221    params
222}
223
224/// Resolve a [`CompressionLevel`] to internal tuning parameters,
225/// optionally adjusted for a known source size.
226fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
227    let params = match level {
228        CompressionLevel::Uncompressed => LevelParams {
229            backend: MatcherBackend::Simple,
230            window_log: 17,
231            hash_fill_step: 1,
232            lazy_depth: 0,
233            hc: HC_CONFIG,
234            row: ROW_CONFIG,
235        },
236        CompressionLevel::Fastest => LEVEL_TABLE[0],
237        CompressionLevel::Default => LEVEL_TABLE[2],
238        CompressionLevel::Better => LEVEL_TABLE[6],
239        CompressionLevel::Best => LEVEL_TABLE[10],
240        CompressionLevel::Level(n) => {
241            if n > 0 {
242                let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
243                LEVEL_TABLE[idx]
244            } else if n == 0 {
245                // Level 0 = default, matching C zstd semantics.
246                LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
247            } else {
248                // Negative levels: ultra-fast with the Simple backend.
249                // Acceleration grows with magnitude, expressed as larger
250                // hash_fill_step (fewer positions indexed).
251                let acceleration =
252                    (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
253                let step = (acceleration + 3).min(128);
254                LevelParams {
255                    backend: MatcherBackend::Simple,
256                    window_log: 17,
257                    hash_fill_step: step,
258                    lazy_depth: 0,
259                    hc: HC_CONFIG,
260                    row: ROW_CONFIG,
261                }
262            }
263        }
264    };
265    if let Some(size) = source_size {
266        adjust_params_for_source_size(params, size)
267    } else {
268        params
269    }
270}
271
272#[derive(Copy, Clone, Debug, PartialEq, Eq)]
273enum MatcherBackend {
274    Simple,
275    Dfast,
276    Row,
277    HashChain,
278}
279
280/// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible.
281pub struct MatchGeneratorDriver {
282    vec_pool: Vec<Vec<u8>>,
283    suffix_pool: Vec<SuffixStore>,
284    match_generator: MatchGenerator,
285    dfast_match_generator: Option<DfastMatchGenerator>,
286    row_match_generator: Option<RowMatchGenerator>,
287    hc_match_generator: Option<HcMatchGenerator>,
288    active_backend: MatcherBackend,
289    slice_size: usize,
290    base_slice_size: usize,
291    // Frame header window size must stay at the configured live-window budget.
292    // Dictionary retention expands internal matcher capacity only.
293    reported_window_size: usize,
294    // Tracks currently retained bytes that originated from primed dictionary
295    // history and have not been evicted yet.
296    dictionary_retained_budget: usize,
297    // Source size hint for next frame (set via set_source_size_hint, cleared on reset).
298    source_size_hint: Option<u64>,
299}
300
301impl MatchGeneratorDriver {
302    /// `slice_size` sets the base block allocation size used for matcher input chunks.
303    /// `max_slices_in_window` determines the initial window capacity at construction
304    /// time. Effective window sizing is recalculated on every [`reset`](Self::reset)
305    /// from the resolved compression level and optional source-size hint.
306    pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
307        let max_window_size = max_slices_in_window * slice_size;
308        Self {
309            vec_pool: Vec::new(),
310            suffix_pool: Vec::new(),
311            match_generator: MatchGenerator::new(max_window_size),
312            dfast_match_generator: None,
313            row_match_generator: None,
314            hc_match_generator: None,
315            active_backend: MatcherBackend::Simple,
316            slice_size,
317            base_slice_size: slice_size,
318            reported_window_size: max_window_size,
319            dictionary_retained_budget: 0,
320            source_size_hint: None,
321        }
322    }
323
324    fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
325        resolve_level_params(level, source_size)
326    }
327
328    fn dfast_matcher(&self) -> &DfastMatchGenerator {
329        self.dfast_match_generator
330            .as_ref()
331            .expect("dfast backend must be initialized by reset() before use")
332    }
333
334    fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
335        self.dfast_match_generator
336            .as_mut()
337            .expect("dfast backend must be initialized by reset() before use")
338    }
339
340    fn row_matcher(&self) -> &RowMatchGenerator {
341        self.row_match_generator
342            .as_ref()
343            .expect("row backend must be initialized by reset() before use")
344    }
345
346    fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
347        self.row_match_generator
348            .as_mut()
349            .expect("row backend must be initialized by reset() before use")
350    }
351
352    fn hc_matcher(&self) -> &HcMatchGenerator {
353        self.hc_match_generator
354            .as_ref()
355            .expect("hash chain backend must be initialized by reset() before use")
356    }
357
358    fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
359        self.hc_match_generator
360            .as_mut()
361            .expect("hash chain backend must be initialized by reset() before use")
362    }
363
364    fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
365        let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
366        if reclaimed == 0 {
367            return;
368        }
369        self.dictionary_retained_budget -= reclaimed;
370        match self.active_backend {
371            MatcherBackend::Simple => {
372                self.match_generator.max_window_size = self
373                    .match_generator
374                    .max_window_size
375                    .saturating_sub(reclaimed);
376            }
377            MatcherBackend::Dfast => {
378                let matcher = self.dfast_matcher_mut();
379                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
380            }
381            MatcherBackend::Row => {
382                let matcher = self.row_matcher_mut();
383                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
384            }
385            MatcherBackend::HashChain => {
386                let matcher = self.hc_matcher_mut();
387                matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
388            }
389        }
390    }
391
392    fn trim_after_budget_retire(&mut self) {
393        loop {
394            let mut evicted_bytes = 0usize;
395            match self.active_backend {
396                MatcherBackend::Simple => {
397                    let vec_pool = &mut self.vec_pool;
398                    let suffix_pool = &mut self.suffix_pool;
399                    self.match_generator.reserve(0, |mut data, mut suffixes| {
400                        evicted_bytes += data.len();
401                        data.resize(data.capacity(), 0);
402                        vec_pool.push(data);
403                        suffixes.slots.clear();
404                        suffixes.slots.resize(suffixes.slots.capacity(), None);
405                        suffix_pool.push(suffixes);
406                    });
407                }
408                MatcherBackend::Dfast => {
409                    let mut retired = Vec::new();
410                    self.dfast_matcher_mut().trim_to_window(|data| {
411                        evicted_bytes += data.len();
412                        retired.push(data);
413                    });
414                    for mut data in retired {
415                        data.resize(data.capacity(), 0);
416                        self.vec_pool.push(data);
417                    }
418                }
419                MatcherBackend::Row => {
420                    let mut retired = Vec::new();
421                    self.row_matcher_mut().trim_to_window(|data| {
422                        evicted_bytes += data.len();
423                        retired.push(data);
424                    });
425                    for mut data in retired {
426                        data.resize(data.capacity(), 0);
427                        self.vec_pool.push(data);
428                    }
429                }
430                MatcherBackend::HashChain => {
431                    let mut retired = Vec::new();
432                    self.hc_matcher_mut().trim_to_window(|data| {
433                        evicted_bytes += data.len();
434                        retired.push(data);
435                    });
436                    for mut data in retired {
437                        data.resize(data.capacity(), 0);
438                        self.vec_pool.push(data);
439                    }
440                }
441            }
442            if evicted_bytes == 0 {
443                break;
444            }
445            self.retire_dictionary_budget(evicted_bytes);
446        }
447    }
448}
449
450impl Matcher for MatchGeneratorDriver {
451    fn supports_dictionary_priming(&self) -> bool {
452        true
453    }
454
455    fn set_source_size_hint(&mut self, size: u64) {
456        self.source_size_hint = Some(size);
457    }
458
459    fn reset(&mut self, level: CompressionLevel) {
460        let hint = self.source_size_hint.take();
461        let hinted = hint.is_some();
462        let params = Self::level_params(level, hint);
463        let max_window_size = 1usize << params.window_log;
464        self.dictionary_retained_budget = 0;
465        if self.active_backend != params.backend {
466            match self.active_backend {
467                MatcherBackend::Simple => {
468                    let vec_pool = &mut self.vec_pool;
469                    let suffix_pool = &mut self.suffix_pool;
470                    self.match_generator.reset(|mut data, mut suffixes| {
471                        data.resize(data.capacity(), 0);
472                        vec_pool.push(data);
473                        suffixes.slots.clear();
474                        suffixes.slots.resize(suffixes.slots.capacity(), None);
475                        suffix_pool.push(suffixes);
476                    });
477                }
478                MatcherBackend::Dfast => {
479                    if let Some(dfast) = self.dfast_match_generator.as_mut() {
480                        let vec_pool = &mut self.vec_pool;
481                        dfast.reset(|mut data| {
482                            data.resize(data.capacity(), 0);
483                            vec_pool.push(data);
484                        });
485                    }
486                }
487                MatcherBackend::Row => {
488                    if let Some(row) = self.row_match_generator.as_mut() {
489                        row.row_heads = Vec::new();
490                        row.row_positions = Vec::new();
491                        row.row_tags = Vec::new();
492                        let vec_pool = &mut self.vec_pool;
493                        row.reset(|mut data| {
494                            data.resize(data.capacity(), 0);
495                            vec_pool.push(data);
496                        });
497                    }
498                }
499                MatcherBackend::HashChain => {
500                    if let Some(hc) = self.hc_match_generator.as_mut() {
501                        // Release oversized tables when switching away from
502                        // HashChain so Best's larger allocations don't persist.
503                        hc.hash_table = Vec::new();
504                        hc.chain_table = Vec::new();
505                        let vec_pool = &mut self.vec_pool;
506                        hc.reset(|mut data| {
507                            data.resize(data.capacity(), 0);
508                            vec_pool.push(data);
509                        });
510                    }
511                }
512            }
513        }
514
515        self.active_backend = params.backend;
516        self.slice_size = self.base_slice_size.min(max_window_size);
517        self.reported_window_size = max_window_size;
518        match self.active_backend {
519            MatcherBackend::Simple => {
520                let vec_pool = &mut self.vec_pool;
521                let suffix_pool = &mut self.suffix_pool;
522                self.match_generator.max_window_size = max_window_size;
523                self.match_generator.hash_fill_step = params.hash_fill_step;
524                self.match_generator.reset(|mut data, mut suffixes| {
525                    data.resize(data.capacity(), 0);
526                    vec_pool.push(data);
527                    suffixes.slots.clear();
528                    suffixes.slots.resize(suffixes.slots.capacity(), None);
529                    suffix_pool.push(suffixes);
530                });
531            }
532            MatcherBackend::Dfast => {
533                let dfast = self
534                    .dfast_match_generator
535                    .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
536                dfast.max_window_size = max_window_size;
537                dfast.lazy_depth = params.lazy_depth;
538                dfast.set_hash_bits(if hinted {
539                    dfast_hash_bits_for_window(max_window_size)
540                } else {
541                    DFAST_HASH_BITS
542                });
543                let vec_pool = &mut self.vec_pool;
544                dfast.reset(|mut data| {
545                    data.resize(data.capacity(), 0);
546                    vec_pool.push(data);
547                });
548            }
549            MatcherBackend::Row => {
550                let row = self
551                    .row_match_generator
552                    .get_or_insert_with(|| RowMatchGenerator::new(max_window_size));
553                row.max_window_size = max_window_size;
554                row.lazy_depth = params.lazy_depth;
555                row.configure(params.row);
556                if hinted {
557                    row.set_hash_bits(row_hash_bits_for_window(max_window_size));
558                }
559                let vec_pool = &mut self.vec_pool;
560                row.reset(|mut data| {
561                    data.resize(data.capacity(), 0);
562                    vec_pool.push(data);
563                });
564            }
565            MatcherBackend::HashChain => {
566                let hc = self
567                    .hc_match_generator
568                    .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
569                hc.max_window_size = max_window_size;
570                hc.lazy_depth = params.lazy_depth;
571                hc.configure(params.hc);
572                let vec_pool = &mut self.vec_pool;
573                hc.reset(|mut data| {
574                    data.resize(data.capacity(), 0);
575                    vec_pool.push(data);
576                });
577            }
578        }
579    }
580
581    fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
582        match self.active_backend {
583            MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
584            MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
585            MatcherBackend::Row => self.row_matcher_mut().offset_hist = offset_hist,
586            MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
587        }
588
589        if dict_content.is_empty() {
590            return;
591        }
592
593        // Dictionary bytes should stay addressable until produced frame output
594        // itself exceeds the live window size.
595        let retained_dict_budget = dict_content.len();
596        match self.active_backend {
597            MatcherBackend::Simple => {
598                self.match_generator.max_window_size = self
599                    .match_generator
600                    .max_window_size
601                    .saturating_add(retained_dict_budget);
602            }
603            MatcherBackend::Dfast => {
604                let matcher = self.dfast_matcher_mut();
605                matcher.max_window_size =
606                    matcher.max_window_size.saturating_add(retained_dict_budget);
607            }
608            MatcherBackend::Row => {
609                let matcher = self.row_matcher_mut();
610                matcher.max_window_size =
611                    matcher.max_window_size.saturating_add(retained_dict_budget);
612            }
613            MatcherBackend::HashChain => {
614                let matcher = self.hc_matcher_mut();
615                matcher.max_window_size =
616                    matcher.max_window_size.saturating_add(retained_dict_budget);
617            }
618        }
619
620        let mut start = 0usize;
621        let mut committed_dict_budget = 0usize;
622        // insert_position needs 4 bytes of lookahead for hashing;
623        // backfill_boundary_positions re-visits tail positions once the
624        // next slice extends history, but cannot hash <4 byte fragments.
625        let min_primed_tail = match self.active_backend {
626            MatcherBackend::Simple => MIN_MATCH_LEN,
627            MatcherBackend::Dfast | MatcherBackend::Row | MatcherBackend::HashChain => 4,
628        };
629        while start < dict_content.len() {
630            let end = (start + self.slice_size).min(dict_content.len());
631            if end - start < min_primed_tail {
632                break;
633            }
634            let mut space = self.get_next_space();
635            space.clear();
636            space.extend_from_slice(&dict_content[start..end]);
637            self.commit_space(space);
638            self.skip_matching();
639            committed_dict_budget += end - start;
640            start = end;
641        }
642
643        let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
644        if uncommitted_tail_budget > 0 {
645            match self.active_backend {
646                MatcherBackend::Simple => {
647                    self.match_generator.max_window_size = self
648                        .match_generator
649                        .max_window_size
650                        .saturating_sub(uncommitted_tail_budget);
651                }
652                MatcherBackend::Dfast => {
653                    let matcher = self.dfast_matcher_mut();
654                    matcher.max_window_size = matcher
655                        .max_window_size
656                        .saturating_sub(uncommitted_tail_budget);
657                }
658                MatcherBackend::Row => {
659                    let matcher = self.row_matcher_mut();
660                    matcher.max_window_size = matcher
661                        .max_window_size
662                        .saturating_sub(uncommitted_tail_budget);
663                }
664                MatcherBackend::HashChain => {
665                    let matcher = self.hc_matcher_mut();
666                    matcher.max_window_size = matcher
667                        .max_window_size
668                        .saturating_sub(uncommitted_tail_budget);
669                }
670            }
671        }
672        if committed_dict_budget > 0 {
673            self.dictionary_retained_budget = self
674                .dictionary_retained_budget
675                .saturating_add(committed_dict_budget);
676        }
677    }
678
679    fn window_size(&self) -> u64 {
680        self.reported_window_size as u64
681    }
682
683    fn get_next_space(&mut self) -> Vec<u8> {
684        if let Some(mut space) = self.vec_pool.pop() {
685            if space.len() > self.slice_size {
686                space.truncate(self.slice_size);
687            }
688            if space.len() < self.slice_size {
689                space.resize(self.slice_size, 0);
690            }
691            return space;
692        }
693        alloc::vec![0; self.slice_size]
694    }
695
696    fn get_last_space(&mut self) -> &[u8] {
697        match self.active_backend {
698            MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
699            MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
700            MatcherBackend::Row => self.row_matcher().get_last_space(),
701            MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
702        }
703    }
704
705    fn commit_space(&mut self, space: Vec<u8>) {
706        match self.active_backend {
707            MatcherBackend::Simple => {
708                let vec_pool = &mut self.vec_pool;
709                let mut evicted_bytes = 0usize;
710                let suffixes = match self.suffix_pool.pop() {
711                    Some(store) if store.slots.len() >= space.len() => store,
712                    _ => SuffixStore::with_capacity(space.len()),
713                };
714                let suffix_pool = &mut self.suffix_pool;
715                self.match_generator
716                    .add_data(space, suffixes, |mut data, mut suffixes| {
717                        evicted_bytes += data.len();
718                        data.resize(data.capacity(), 0);
719                        vec_pool.push(data);
720                        suffixes.slots.clear();
721                        suffixes.slots.resize(suffixes.slots.capacity(), None);
722                        suffix_pool.push(suffixes);
723                    });
724                self.retire_dictionary_budget(evicted_bytes);
725                self.trim_after_budget_retire();
726            }
727            MatcherBackend::Dfast => {
728                let vec_pool = &mut self.vec_pool;
729                let mut evicted_bytes = 0usize;
730                self.dfast_match_generator
731                    .as_mut()
732                    .expect("dfast backend must be initialized by reset() before use")
733                    .add_data(space, |mut data| {
734                        evicted_bytes += data.len();
735                        data.resize(data.capacity(), 0);
736                        vec_pool.push(data);
737                    });
738                self.retire_dictionary_budget(evicted_bytes);
739                self.trim_after_budget_retire();
740            }
741            MatcherBackend::Row => {
742                let vec_pool = &mut self.vec_pool;
743                let mut evicted_bytes = 0usize;
744                self.row_match_generator
745                    .as_mut()
746                    .expect("row backend must be initialized by reset() before use")
747                    .add_data(space, |mut data| {
748                        evicted_bytes += data.len();
749                        data.resize(data.capacity(), 0);
750                        vec_pool.push(data);
751                    });
752                self.retire_dictionary_budget(evicted_bytes);
753                self.trim_after_budget_retire();
754            }
755            MatcherBackend::HashChain => {
756                let vec_pool = &mut self.vec_pool;
757                let mut evicted_bytes = 0usize;
758                self.hc_match_generator
759                    .as_mut()
760                    .expect("hash chain backend must be initialized by reset() before use")
761                    .add_data(space, |mut data| {
762                        evicted_bytes += data.len();
763                        data.resize(data.capacity(), 0);
764                        vec_pool.push(data);
765                    });
766                self.retire_dictionary_budget(evicted_bytes);
767                self.trim_after_budget_retire();
768            }
769        }
770    }
771
772    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
773        match self.active_backend {
774            MatcherBackend::Simple => {
775                while self.match_generator.next_sequence(&mut handle_sequence) {}
776            }
777            MatcherBackend::Dfast => self
778                .dfast_matcher_mut()
779                .start_matching(&mut handle_sequence),
780            MatcherBackend::Row => self.row_matcher_mut().start_matching(&mut handle_sequence),
781            MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
782        }
783    }
784    fn skip_matching(&mut self) {
785        match self.active_backend {
786            MatcherBackend::Simple => self.match_generator.skip_matching(),
787            MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(),
788            MatcherBackend::Row => self.row_matcher_mut().skip_matching(),
789            MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(),
790        }
791    }
792}
793
794/// This stores the index of a suffix of a string by hashing the first few bytes of that suffix
795/// This means that collisions just overwrite and that you need to check validity after a get
796struct SuffixStore {
797    // We use NonZeroUsize to enable niche optimization here.
798    // On store we do +1 and on get -1
799    // This is ok since usize::MAX is never a valid offset
800    slots: Vec<Option<NonZeroUsize>>,
801    len_log: u32,
802}
803
804impl SuffixStore {
805    fn with_capacity(capacity: usize) -> Self {
806        Self {
807            slots: alloc::vec![None; capacity],
808            len_log: capacity.ilog2(),
809        }
810    }
811
812    #[inline(always)]
813    fn insert(&mut self, suffix: &[u8], idx: usize) {
814        let key = self.key(suffix);
815        self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
816    }
817
818    #[inline(always)]
819    fn contains_key(&self, suffix: &[u8]) -> bool {
820        let key = self.key(suffix);
821        self.slots[key].is_some()
822    }
823
824    #[inline(always)]
825    fn get(&self, suffix: &[u8]) -> Option<usize> {
826        let key = self.key(suffix);
827        self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
828    }
829
830    #[inline(always)]
831    fn key(&self, suffix: &[u8]) -> usize {
832        // Capacity=1 yields len_log=0; shifting by 64 would panic.
833        if self.len_log == 0 {
834            return 0;
835        }
836
837        let s0 = suffix[0] as u64;
838        let s1 = suffix[1] as u64;
839        let s2 = suffix[2] as u64;
840        let s3 = suffix[3] as u64;
841        let s4 = suffix[4] as u64;
842
843        const POLY: u64 = 0xCF3BCCDCABu64;
844
845        let s0 = (s0 << 24).wrapping_mul(POLY);
846        let s1 = (s1 << 32).wrapping_mul(POLY);
847        let s2 = (s2 << 40).wrapping_mul(POLY);
848        let s3 = (s3 << 48).wrapping_mul(POLY);
849        let s4 = (s4 << 56).wrapping_mul(POLY);
850
851        let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
852        let index = index >> (64 - self.len_log);
853        index as usize % self.slots.len()
854    }
855}
856
857/// We keep a window of a few of these entries
858/// All of these are valid targets for a match to be generated for
859struct WindowEntry {
860    data: Vec<u8>,
861    /// Stores indexes into data
862    suffixes: SuffixStore,
863    /// Makes offset calculations efficient
864    base_offset: usize,
865}
866
867pub(crate) struct MatchGenerator {
868    max_window_size: usize,
869    /// Data window we are operating on to find matches
870    /// The data we want to find matches for is in the last slice
871    window: Vec<WindowEntry>,
872    window_size: usize,
873    #[cfg(debug_assertions)]
874    concat_window: Vec<u8>,
875    /// Index in the last slice that we already processed
876    suffix_idx: usize,
877    /// Gets updated when a new sequence is returned to point right behind that sequence
878    last_idx_in_sequence: usize,
879    hash_fill_step: usize,
880    offset_hist: [u32; 3],
881}
882
883impl MatchGenerator {
884    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
885    #[inline(always)]
886    const fn select_x86_prefix_kernel(has_avx2: bool, has_sse2: bool) -> PrefixKernel {
887        if has_avx2 {
888            return PrefixKernel::X86Avx2;
889        }
890        if has_sse2 {
891            return PrefixKernel::X86Sse2;
892        }
893        PrefixKernel::Scalar
894    }
895
896    #[cfg(feature = "std")]
897    #[inline(always)]
898    fn detect_prefix_kernel() -> PrefixKernel {
899        static KERNEL: OnceLock<PrefixKernel> = OnceLock::new();
900        *KERNEL.get_or_init(|| {
901            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
902            {
903                let kernel = Self::select_x86_prefix_kernel(
904                    is_x86_feature_detected!("avx2"),
905                    is_x86_feature_detected!("sse2"),
906                );
907                if kernel != PrefixKernel::Scalar {
908                    return kernel;
909                }
910            }
911            #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
912            {
913                if is_aarch64_feature_detected!("neon") {
914                    return PrefixKernel::Aarch64Neon;
915                }
916            }
917            PrefixKernel::Scalar
918        })
919    }
920
921    #[cfg(not(feature = "std"))]
922    #[inline(always)]
923    fn detect_prefix_kernel() -> PrefixKernel {
924        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
925        {
926            let kernel = Self::select_x86_prefix_kernel(
927                cfg!(target_feature = "avx2"),
928                cfg!(target_feature = "sse2"),
929            );
930            if kernel != PrefixKernel::Scalar {
931                return kernel;
932            }
933        }
934        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
935        {
936            if cfg!(target_feature = "neon") {
937                return PrefixKernel::Aarch64Neon;
938            }
939        }
940        PrefixKernel::Scalar
941    }
942
943    #[inline(always)]
944    #[cfg(target_endian = "little")]
945    fn mismatch_byte_index(diff: usize) -> usize {
946        diff.trailing_zeros() as usize / 8
947    }
948
949    #[inline(always)]
950    #[cfg(target_endian = "big")]
951    fn mismatch_byte_index(diff: usize) -> usize {
952        diff.leading_zeros() as usize / 8
953    }
954
955    /// max_size defines how many bytes will be used at most in the window used for matching
956    fn new(max_size: usize) -> Self {
957        Self {
958            max_window_size: max_size,
959            window: Vec::new(),
960            window_size: 0,
961            #[cfg(debug_assertions)]
962            concat_window: Vec::new(),
963            suffix_idx: 0,
964            last_idx_in_sequence: 0,
965            hash_fill_step: 1,
966            offset_hist: [1, 4, 8],
967        }
968    }
969
970    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
971        self.window_size = 0;
972        #[cfg(debug_assertions)]
973        self.concat_window.clear();
974        self.suffix_idx = 0;
975        self.last_idx_in_sequence = 0;
976        self.offset_hist = [1, 4, 8];
977        self.window.drain(..).for_each(|entry| {
978            reuse_space(entry.data, entry.suffixes);
979        });
980    }
981
982    /// Processes bytes in the current window until either a match is found or no more matches can be found
983    /// * If a match is found handle_sequence is called with the Triple variant
984    /// * If no more matches can be found but there are bytes still left handle_sequence is called with the Literals variant
985    /// * If no more matches can be found and no more bytes are left this returns false
986    fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
987        loop {
988            let last_entry = self.window.last().unwrap();
989            let data_slice = &last_entry.data;
990
991            // We already reached the end of the window, check if we need to return a Literals{}
992            if self.suffix_idx >= data_slice.len() {
993                if self.last_idx_in_sequence != self.suffix_idx {
994                    let literals = &data_slice[self.last_idx_in_sequence..];
995                    self.last_idx_in_sequence = self.suffix_idx;
996                    handle_sequence(Sequence::Literals { literals });
997                    return true;
998                } else {
999                    return false;
1000                }
1001            }
1002
1003            // If the remaining data is smaller than the minimum match length we can stop and return a Literals{}
1004            let data_slice = &data_slice[self.suffix_idx..];
1005            if data_slice.len() < MIN_MATCH_LEN {
1006                let last_idx_in_sequence = self.last_idx_in_sequence;
1007                self.last_idx_in_sequence = last_entry.data.len();
1008                self.suffix_idx = last_entry.data.len();
1009                handle_sequence(Sequence::Literals {
1010                    literals: &last_entry.data[last_idx_in_sequence..],
1011                });
1012                return true;
1013            }
1014
1015            // This is the key we are looking to find a match for
1016            let key = &data_slice[..MIN_MATCH_LEN];
1017            let literals_len = self.suffix_idx - self.last_idx_in_sequence;
1018
1019            // Look in each window entry
1020            let mut candidate = self.repcode_candidate(data_slice, literals_len);
1021            for match_entry in self.window.iter() {
1022                if let Some(match_index) = match_entry.suffixes.get(key) {
1023                    let match_slice = &match_entry.data[match_index..];
1024
1025                    // Check how long the common prefix actually is
1026                    let match_len = Self::common_prefix_len(match_slice, data_slice);
1027
1028                    // Collisions in the suffix store might make this check fail
1029                    if match_len >= MIN_MATCH_LEN {
1030                        let offset = match_entry.base_offset + self.suffix_idx - match_index;
1031
1032                        // If we are in debug/tests make sure the match we found is actually at the offset we calculated
1033                        #[cfg(debug_assertions)]
1034                        {
1035                            let unprocessed = last_entry.data.len() - self.suffix_idx;
1036                            let start = self.concat_window.len() - unprocessed - offset;
1037                            let end = start + match_len;
1038                            let check_slice = &self.concat_window[start..end];
1039                            debug_assert_eq!(check_slice, &match_slice[..match_len]);
1040                        }
1041
1042                        if let Some((old_offset, old_match_len)) = candidate {
1043                            if match_len > old_match_len
1044                                || (match_len == old_match_len && offset < old_offset)
1045                            {
1046                                candidate = Some((offset, match_len));
1047                            }
1048                        } else {
1049                            candidate = Some((offset, match_len));
1050                        }
1051                    }
1052                }
1053            }
1054
1055            if let Some((offset, match_len)) = candidate {
1056                // For each index in the match we found we do not need to look for another match
1057                // But we still want them registered in the suffix store
1058                self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
1059
1060                // All literals that were not included between this match and the last are now included here
1061                let last_entry = self.window.last().unwrap();
1062                let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
1063
1064                // Update the indexes, all indexes upto and including the current index have been included in a sequence now
1065                self.suffix_idx += match_len;
1066                self.last_idx_in_sequence = self.suffix_idx;
1067                let _ = encode_offset_with_history(
1068                    offset as u32,
1069                    literals.len() as u32,
1070                    &mut self.offset_hist,
1071                );
1072                handle_sequence(Sequence::Triple {
1073                    literals,
1074                    offset,
1075                    match_len,
1076                });
1077
1078                return true;
1079            }
1080
1081            let last_entry = self.window.last_mut().unwrap();
1082            let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
1083            if !last_entry.suffixes.contains_key(key) {
1084                last_entry.suffixes.insert(key, self.suffix_idx);
1085            }
1086            self.suffix_idx += 1;
1087        }
1088    }
1089
1090    /// Find the common prefix length between two byte slices
1091    #[inline(always)]
1092    fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
1093        let max = a.len().min(b.len());
1094        let mut off = 0usize;
1095        let lhs = a.as_ptr();
1096        let rhs = b.as_ptr();
1097
1098        match Self::detect_prefix_kernel() {
1099            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1100            PrefixKernel::X86Avx2 => {
1101                off = unsafe { Self::prefix_len_simd_avx2(lhs, rhs, max) };
1102            }
1103            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1104            PrefixKernel::X86Sse2 => {
1105                off = unsafe { Self::prefix_len_simd_sse2(lhs, rhs, max) };
1106            }
1107            #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1108            PrefixKernel::Aarch64Neon => {
1109                off = unsafe { Self::prefix_len_simd_neon(lhs, rhs, max) };
1110            }
1111            PrefixKernel::Scalar => {}
1112        }
1113
1114        Self::common_prefix_len_scalar(a, b, off, max)
1115    }
1116
1117    #[inline(always)]
1118    fn common_prefix_len_scalar(a: &[u8], b: &[u8], mut off: usize, max: usize) -> usize {
1119        let chunk = core::mem::size_of::<usize>();
1120        let lhs = a.as_ptr();
1121        let rhs = b.as_ptr();
1122        while off + chunk <= max {
1123            let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
1124            let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
1125            let diff = lhs_word ^ rhs_word;
1126            if diff != 0 {
1127                return off + Self::mismatch_byte_index(diff);
1128            }
1129            off += chunk;
1130        }
1131        off + core::iter::zip(&a[off..max], &b[off..max])
1132            .take_while(|(x, y)| x == y)
1133            .count()
1134    }
1135
1136    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1137    #[target_feature(enable = "sse2")]
1138    unsafe fn prefix_len_simd_sse2(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1139        let mut off = 0usize;
1140        while off + 16 <= max {
1141            let a: __m128i = unsafe { _mm_loadu_si128(lhs.add(off).cast::<__m128i>()) };
1142            let b: __m128i = unsafe { _mm_loadu_si128(rhs.add(off).cast::<__m128i>()) };
1143            let eq = _mm_cmpeq_epi8(a, b);
1144            let mask = _mm_movemask_epi8(eq) as u32;
1145            if mask != 0xFFFF {
1146                return off + (!mask).trailing_zeros() as usize;
1147            }
1148            off += 16;
1149        }
1150        off
1151    }
1152
1153    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1154    #[target_feature(enable = "avx2")]
1155    unsafe fn prefix_len_simd_avx2(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1156        let mut off = 0usize;
1157        while off + 32 <= max {
1158            let a: __m256i = unsafe { _mm256_loadu_si256(lhs.add(off).cast::<__m256i>()) };
1159            let b: __m256i = unsafe { _mm256_loadu_si256(rhs.add(off).cast::<__m256i>()) };
1160            let eq = _mm256_cmpeq_epi8(a, b);
1161            let mask = _mm256_movemask_epi8(eq) as u32;
1162            if mask != u32::MAX {
1163                return off + (!mask).trailing_zeros() as usize;
1164            }
1165            off += 32;
1166        }
1167        off
1168    }
1169
1170    #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1171    #[target_feature(enable = "neon")]
1172    unsafe fn prefix_len_simd_neon(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1173        let mut off = 0usize;
1174        while off + 16 <= max {
1175            let a: uint8x16_t = unsafe { vld1q_u8(lhs.add(off)) };
1176            let b: uint8x16_t = unsafe { vld1q_u8(rhs.add(off)) };
1177            let eq = vceqq_u8(a, b);
1178            let lanes = vreinterpretq_u64_u8(eq);
1179            let low = vgetq_lane_u64(lanes, 0);
1180            if low != u64::MAX {
1181                let diff = low ^ u64::MAX;
1182                return off + Self::mismatch_byte_index(diff as usize);
1183            }
1184            let high = vgetq_lane_u64(lanes, 1);
1185            if high != u64::MAX {
1186                let diff = high ^ u64::MAX;
1187                return off + 8 + Self::mismatch_byte_index(diff as usize);
1188            }
1189            off += 16;
1190        }
1191        off
1192    }
1193
1194    /// Process bytes and add the suffixes to the suffix store up to a specific index
1195    #[inline(always)]
1196    fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
1197        let start = self.suffix_idx;
1198        let last_entry = self.window.last_mut().unwrap();
1199        if last_entry.data.len() < MIN_MATCH_LEN {
1200            return;
1201        }
1202        let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
1203        if insert_limit > start {
1204            let data = last_entry.data.as_slice();
1205            let suffixes = &mut last_entry.suffixes;
1206            if fill_step == FAST_HASH_FILL_STEP {
1207                Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
1208            } else {
1209                let mut pos = start;
1210                while pos < insert_limit {
1211                    Self::insert_suffix_if_absent(data, suffixes, pos);
1212                    pos += fill_step;
1213                }
1214            }
1215        }
1216
1217        if idx >= start + MIN_MATCH_LEN {
1218            let tail_start = idx - MIN_MATCH_LEN;
1219            let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
1220            if !last_entry.suffixes.contains_key(tail_key) {
1221                last_entry.suffixes.insert(tail_key, tail_start);
1222            }
1223        }
1224    }
1225
1226    #[inline(always)]
1227    fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
1228        debug_assert!(
1229            pos + MIN_MATCH_LEN <= data.len(),
1230            "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
1231            pos,
1232            MIN_MATCH_LEN,
1233            data.len()
1234        );
1235        let key = &data[pos..pos + MIN_MATCH_LEN];
1236        if !suffixes.contains_key(key) {
1237            suffixes.insert(key, pos);
1238        }
1239    }
1240
1241    #[inline(always)]
1242    fn add_suffixes_interleaved_fast(
1243        data: &[u8],
1244        suffixes: &mut SuffixStore,
1245        start: usize,
1246        insert_limit: usize,
1247    ) {
1248        let lane = FAST_HASH_FILL_STEP;
1249        let mut pos = start;
1250
1251        // Pipeline-ish fill: compute and retire several hash positions per loop
1252        // so the fastest path keeps multiple independent hash lookups in flight.
1253        while pos + lane * 3 < insert_limit {
1254            let p0 = pos;
1255            let p1 = pos + lane;
1256            let p2 = pos + lane * 2;
1257            let p3 = pos + lane * 3;
1258
1259            Self::insert_suffix_if_absent(data, suffixes, p0);
1260            Self::insert_suffix_if_absent(data, suffixes, p1);
1261            Self::insert_suffix_if_absent(data, suffixes, p2);
1262            Self::insert_suffix_if_absent(data, suffixes, p3);
1263
1264            pos += lane * 4;
1265        }
1266
1267        while pos < insert_limit {
1268            Self::insert_suffix_if_absent(data, suffixes, pos);
1269            pos += lane;
1270        }
1271    }
1272
1273    fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
1274        if literals_len != 0 {
1275            return None;
1276        }
1277
1278        let reps = [
1279            Some(self.offset_hist[1] as usize),
1280            Some(self.offset_hist[2] as usize),
1281            (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1282        ];
1283
1284        let mut best: Option<(usize, usize)> = None;
1285        let mut seen = [0usize; 3];
1286        let mut seen_len = 0usize;
1287        for offset in reps.into_iter().flatten() {
1288            if offset == 0 {
1289                continue;
1290            }
1291            if seen[..seen_len].contains(&offset) {
1292                continue;
1293            }
1294            seen[seen_len] = offset;
1295            seen_len += 1;
1296
1297            let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1298                continue;
1299            };
1300            if match_len < MIN_MATCH_LEN {
1301                continue;
1302            }
1303
1304            if best.is_none_or(|(old_offset, old_len)| {
1305                match_len > old_len || (match_len == old_len && offset < old_offset)
1306            }) {
1307                best = Some((offset, match_len));
1308            }
1309        }
1310        best
1311    }
1312
1313    fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1314        if offset == 0 {
1315            return None;
1316        }
1317
1318        let last_idx = self.window.len().checked_sub(1)?;
1319        let last_entry = &self.window[last_idx];
1320        let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1321        if offset > searchable_prefix {
1322            return None;
1323        }
1324
1325        let mut remaining = offset;
1326        let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1327            (last_idx, self.suffix_idx - remaining)
1328        } else {
1329            remaining -= self.suffix_idx;
1330            let mut found = None;
1331            for entry_idx in (0..last_idx).rev() {
1332                let len = self.window[entry_idx].data.len();
1333                if remaining <= len {
1334                    found = Some((entry_idx, len - remaining));
1335                    break;
1336                }
1337                remaining -= len;
1338            }
1339            found?
1340        };
1341
1342        let match_entry = &self.window[entry_idx];
1343        let match_slice = &match_entry.data[match_index..];
1344
1345        Some(Self::common_prefix_len(match_slice, data_slice))
1346    }
1347
1348    /// Skip matching for the whole current window entry
1349    fn skip_matching(&mut self) {
1350        let len = self.window.last().unwrap().data.len();
1351        self.add_suffixes_till(len, 1);
1352        self.suffix_idx = len;
1353        self.last_idx_in_sequence = len;
1354    }
1355
1356    /// Add a new window entry. Will panic if the last window entry hasn't been processed properly.
1357    /// If any resources are released by pushing the new entry they are returned via the callback
1358    fn add_data(
1359        &mut self,
1360        data: Vec<u8>,
1361        suffixes: SuffixStore,
1362        reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1363    ) {
1364        assert!(
1365            self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1366        );
1367        self.reserve(data.len(), reuse_space);
1368        #[cfg(debug_assertions)]
1369        self.concat_window.extend_from_slice(&data);
1370
1371        if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1372            for entry in self.window.iter_mut() {
1373                entry.base_offset += last_len;
1374            }
1375        }
1376
1377        let len = data.len();
1378        self.window.push(WindowEntry {
1379            data,
1380            suffixes,
1381            base_offset: 0,
1382        });
1383        self.window_size += len;
1384        self.suffix_idx = 0;
1385        self.last_idx_in_sequence = 0;
1386    }
1387
1388    /// Reserve space for a new window entry
1389    /// If any resources are released by pushing the new entry they are returned via the callback
1390    fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1391        assert!(self.max_window_size >= amount);
1392        while self.window_size + amount > self.max_window_size {
1393            let removed = self.window.remove(0);
1394            self.window_size -= removed.data.len();
1395            #[cfg(debug_assertions)]
1396            self.concat_window.drain(0..removed.data.len());
1397
1398            let WindowEntry {
1399                suffixes,
1400                data: leaked_vec,
1401                base_offset: _,
1402            } = removed;
1403            reuse_space(leaked_vec, suffixes);
1404        }
1405    }
1406}
1407
1408struct DfastMatchGenerator {
1409    max_window_size: usize,
1410    window: VecDeque<Vec<u8>>,
1411    window_size: usize,
1412    // We keep a contiguous searchable history to avoid rebuilding and reseeding
1413    // the matcher state from disjoint block buffers on every block.
1414    history: Vec<u8>,
1415    history_start: usize,
1416    history_abs_start: usize,
1417    offset_hist: [u32; 3],
1418    short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1419    long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1420    hash_bits: usize,
1421    // Lazy match lookahead depth (internal tuning parameter).
1422    lazy_depth: u8,
1423}
1424
1425#[derive(Copy, Clone)]
1426struct MatchCandidate {
1427    start: usize,
1428    offset: usize,
1429    match_len: usize,
1430}
1431
1432fn best_len_offset_candidate(
1433    lhs: Option<MatchCandidate>,
1434    rhs: Option<MatchCandidate>,
1435) -> Option<MatchCandidate> {
1436    match (lhs, rhs) {
1437        (None, other) | (other, None) => other,
1438        (Some(lhs), Some(rhs)) => {
1439            if rhs.match_len > lhs.match_len
1440                || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1441            {
1442                Some(rhs)
1443            } else {
1444                Some(lhs)
1445            }
1446        }
1447    }
1448}
1449
1450fn extend_backwards_shared(
1451    concat: &[u8],
1452    history_abs_start: usize,
1453    mut candidate_pos: usize,
1454    mut abs_pos: usize,
1455    mut match_len: usize,
1456    lit_len: usize,
1457) -> MatchCandidate {
1458    let min_abs_pos = abs_pos - lit_len;
1459    while abs_pos > min_abs_pos
1460        && candidate_pos > history_abs_start
1461        && concat[candidate_pos - history_abs_start - 1] == concat[abs_pos - history_abs_start - 1]
1462    {
1463        candidate_pos -= 1;
1464        abs_pos -= 1;
1465        match_len += 1;
1466    }
1467    MatchCandidate {
1468        start: abs_pos,
1469        offset: abs_pos - candidate_pos,
1470        match_len,
1471    }
1472}
1473
1474fn repcode_candidate_shared(
1475    concat: &[u8],
1476    history_abs_start: usize,
1477    offset_hist: [u32; 3],
1478    abs_pos: usize,
1479    lit_len: usize,
1480    min_match_len: usize,
1481) -> Option<MatchCandidate> {
1482    let reps = if lit_len == 0 {
1483        [
1484            Some(offset_hist[1] as usize),
1485            Some(offset_hist[2] as usize),
1486            (offset_hist[0] > 1).then_some((offset_hist[0] - 1) as usize),
1487        ]
1488    } else {
1489        [
1490            Some(offset_hist[0] as usize),
1491            Some(offset_hist[1] as usize),
1492            Some(offset_hist[2] as usize),
1493        ]
1494    };
1495
1496    let current_idx = abs_pos - history_abs_start;
1497    if current_idx + min_match_len > concat.len() {
1498        return None;
1499    }
1500
1501    let mut best = None;
1502    for rep in reps.into_iter().flatten() {
1503        if rep == 0 || rep > abs_pos {
1504            continue;
1505        }
1506        let candidate_pos = abs_pos - rep;
1507        if candidate_pos < history_abs_start {
1508            continue;
1509        }
1510        let candidate_idx = candidate_pos - history_abs_start;
1511        let match_len =
1512            MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1513        if match_len >= min_match_len {
1514            let candidate = extend_backwards_shared(
1515                concat,
1516                history_abs_start,
1517                candidate_pos,
1518                abs_pos,
1519                match_len,
1520                lit_len,
1521            );
1522            best = best_len_offset_candidate(best, Some(candidate));
1523        }
1524    }
1525    best
1526}
1527
1528#[derive(Copy, Clone)]
1529struct LazyMatchConfig {
1530    target_len: usize,
1531    min_match_len: usize,
1532    lazy_depth: u8,
1533    history_abs_end: usize,
1534}
1535
1536fn pick_lazy_match_shared(
1537    abs_pos: usize,
1538    lit_len: usize,
1539    best: Option<MatchCandidate>,
1540    config: LazyMatchConfig,
1541    mut best_match_at: impl FnMut(usize, usize) -> Option<MatchCandidate>,
1542) -> Option<MatchCandidate> {
1543    let best = best?;
1544    if best.match_len >= config.target_len
1545        || abs_pos + 1 + config.min_match_len > config.history_abs_end
1546    {
1547        return Some(best);
1548    }
1549
1550    let next = best_match_at(abs_pos + 1, lit_len + 1);
1551    if let Some(next) = next
1552        && (next.match_len > best.match_len
1553            || (next.match_len == best.match_len && next.offset < best.offset))
1554    {
1555        return None;
1556    }
1557
1558    if config.lazy_depth >= 2 && abs_pos + 2 + config.min_match_len <= config.history_abs_end {
1559        let next2 = best_match_at(abs_pos + 2, lit_len + 2);
1560        if let Some(next2) = next2
1561            && next2.match_len > best.match_len + 1
1562        {
1563            return None;
1564        }
1565    }
1566
1567    Some(best)
1568}
1569
1570impl DfastMatchGenerator {
1571    fn new(max_window_size: usize) -> Self {
1572        Self {
1573            max_window_size,
1574            window: VecDeque::new(),
1575            window_size: 0,
1576            history: Vec::new(),
1577            history_start: 0,
1578            history_abs_start: 0,
1579            offset_hist: [1, 4, 8],
1580            short_hash: Vec::new(),
1581            long_hash: Vec::new(),
1582            hash_bits: DFAST_HASH_BITS,
1583            lazy_depth: 1,
1584        }
1585    }
1586
1587    fn set_hash_bits(&mut self, bits: usize) {
1588        let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1589        if self.hash_bits != clamped {
1590            self.hash_bits = clamped;
1591            self.short_hash = Vec::new();
1592            self.long_hash = Vec::new();
1593        }
1594    }
1595
1596    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1597        self.window_size = 0;
1598        self.history.clear();
1599        self.history_start = 0;
1600        self.history_abs_start = 0;
1601        self.offset_hist = [1, 4, 8];
1602        if !self.short_hash.is_empty() {
1603            self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1604            self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1605        }
1606        for mut data in self.window.drain(..) {
1607            data.resize(data.capacity(), 0);
1608            reuse_space(data);
1609        }
1610    }
1611
1612    fn get_last_space(&self) -> &[u8] {
1613        self.window.back().unwrap().as_slice()
1614    }
1615
1616    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1617        assert!(data.len() <= self.max_window_size);
1618        while self.window_size + data.len() > self.max_window_size {
1619            let removed = self.window.pop_front().unwrap();
1620            self.window_size -= removed.len();
1621            self.history_start += removed.len();
1622            self.history_abs_start += removed.len();
1623            reuse_space(removed);
1624        }
1625        self.compact_history();
1626        self.history.extend_from_slice(&data);
1627        self.window_size += data.len();
1628        self.window.push_back(data);
1629    }
1630
1631    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1632        while self.window_size > self.max_window_size {
1633            let removed = self.window.pop_front().unwrap();
1634            self.window_size -= removed.len();
1635            self.history_start += removed.len();
1636            self.history_abs_start += removed.len();
1637            reuse_space(removed);
1638        }
1639    }
1640
1641    fn skip_matching(&mut self) {
1642        self.ensure_hash_tables();
1643        let current_len = self.window.back().unwrap().len();
1644        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1645        self.insert_positions(current_abs_start, current_abs_start + current_len);
1646    }
1647
1648    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1649        self.ensure_hash_tables();
1650
1651        let current_len = self.window.back().unwrap().len();
1652        if current_len == 0 {
1653            return;
1654        }
1655
1656        let current_abs_start = self.history_abs_start + self.window_size - current_len;
1657
1658        let mut pos = 0usize;
1659        let mut literals_start = 0usize;
1660        while pos + DFAST_MIN_MATCH_LEN <= current_len {
1661            let abs_pos = current_abs_start + pos;
1662            let lit_len = pos - literals_start;
1663
1664            let best = self.best_match(abs_pos, lit_len);
1665            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1666                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
1667                let current = self.window.back().unwrap().as_slice();
1668                let start = candidate.start - current_abs_start;
1669                let literals = &current[literals_start..start];
1670                handle_sequence(Sequence::Triple {
1671                    literals,
1672                    offset: candidate.offset,
1673                    match_len: candidate.match_len,
1674                });
1675                // The encoded offset value is irrelevant here; we only need the
1676                // side effect on offset history for future rep-code matching.
1677                let _ = encode_offset_with_history(
1678                    candidate.offset as u32,
1679                    literals.len() as u32,
1680                    &mut self.offset_hist,
1681                );
1682                pos = start + candidate.match_len;
1683                literals_start = pos;
1684            } else {
1685                self.insert_position(abs_pos);
1686                pos += 1;
1687            }
1688        }
1689
1690        if literals_start < current_len {
1691            // We stop inserting once fewer than DFAST_MIN_MATCH_LEN bytes remain.
1692            // The last boundary-spanning start that can seed a dfast hash is
1693            // still inserted by the loop above; `dfast_inserts_tail_positions_
1694            // for_next_block_matching()` asserts that the next block can match
1695            // immediately at the boundary without eagerly seeding the final
1696            // DFAST_MIN_MATCH_LEN - 1 bytes here.
1697            let current = self.window.back().unwrap().as_slice();
1698            handle_sequence(Sequence::Literals {
1699                literals: &current[literals_start..],
1700            });
1701        }
1702    }
1703
1704    fn ensure_hash_tables(&mut self) {
1705        let table_len = 1usize << self.hash_bits;
1706        if self.short_hash.len() != table_len {
1707            // This is intentionally lazy so Fastest/Uncompressed never pay the
1708            // ~dfast-level memory cost. The current size tracks the issue's
1709            // zstd level-3 style parameters rather than a generic low-memory preset.
1710            self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1711            self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
1712        }
1713    }
1714
1715    fn compact_history(&mut self) {
1716        if self.history_start == 0 {
1717            return;
1718        }
1719        if self.history_start >= self.max_window_size
1720            || self.history_start * 2 >= self.history.len()
1721        {
1722            self.history.drain(..self.history_start);
1723            self.history_start = 0;
1724        }
1725    }
1726
1727    fn live_history(&self) -> &[u8] {
1728        &self.history[self.history_start..]
1729    }
1730
1731    fn history_abs_end(&self) -> usize {
1732        self.history_abs_start + self.live_history().len()
1733    }
1734
1735    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1736        let rep = self.repcode_candidate(abs_pos, lit_len);
1737        let hash = self.hash_candidate(abs_pos, lit_len);
1738        best_len_offset_candidate(rep, hash)
1739    }
1740
1741    fn pick_lazy_match(
1742        &self,
1743        abs_pos: usize,
1744        lit_len: usize,
1745        best: Option<MatchCandidate>,
1746    ) -> Option<MatchCandidate> {
1747        pick_lazy_match_shared(
1748            abs_pos,
1749            lit_len,
1750            best,
1751            LazyMatchConfig {
1752                target_len: DFAST_TARGET_LEN,
1753                min_match_len: DFAST_MIN_MATCH_LEN,
1754                lazy_depth: self.lazy_depth,
1755                history_abs_end: self.history_abs_end(),
1756            },
1757            |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
1758        )
1759    }
1760
1761    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1762        repcode_candidate_shared(
1763            self.live_history(),
1764            self.history_abs_start,
1765            self.offset_hist,
1766            abs_pos,
1767            lit_len,
1768            DFAST_MIN_MATCH_LEN,
1769        )
1770    }
1771
1772    fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
1773        let concat = self.live_history();
1774        let current_idx = abs_pos - self.history_abs_start;
1775        let mut best = None;
1776        for candidate_pos in self.long_candidates(abs_pos) {
1777            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1778                continue;
1779            }
1780            let candidate_idx = candidate_pos - self.history_abs_start;
1781            let match_len =
1782                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1783            if match_len >= DFAST_MIN_MATCH_LEN {
1784                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1785                best = best_len_offset_candidate(best, Some(candidate));
1786                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1787                    return best;
1788                }
1789            }
1790        }
1791
1792        for candidate_pos in self.short_candidates(abs_pos) {
1793            if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
1794                continue;
1795            }
1796            let candidate_idx = candidate_pos - self.history_abs_start;
1797            let match_len =
1798                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1799            if match_len >= DFAST_MIN_MATCH_LEN {
1800                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
1801                best = best_len_offset_candidate(best, Some(candidate));
1802                if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
1803                    return best;
1804                }
1805            }
1806        }
1807        best
1808    }
1809
1810    fn extend_backwards(
1811        &self,
1812        candidate_pos: usize,
1813        abs_pos: usize,
1814        match_len: usize,
1815        lit_len: usize,
1816    ) -> MatchCandidate {
1817        extend_backwards_shared(
1818            self.live_history(),
1819            self.history_abs_start,
1820            candidate_pos,
1821            abs_pos,
1822            match_len,
1823            lit_len,
1824        )
1825    }
1826
1827    fn insert_positions(&mut self, start: usize, end: usize) {
1828        for pos in start..end {
1829            self.insert_position(pos);
1830        }
1831    }
1832
1833    fn insert_position(&mut self, pos: usize) {
1834        let idx = pos - self.history_abs_start;
1835        let short = {
1836            let concat = self.live_history();
1837            (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
1838        };
1839        if let Some(short) = short {
1840            let bucket = &mut self.short_hash[short];
1841            if bucket[0] != pos {
1842                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1843                bucket[0] = pos;
1844            }
1845        }
1846
1847        let long = {
1848            let concat = self.live_history();
1849            (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
1850        };
1851        if let Some(long) = long {
1852            let bucket = &mut self.long_hash[long];
1853            if bucket[0] != pos {
1854                bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
1855                bucket[0] = pos;
1856            }
1857        }
1858    }
1859
1860    fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1861        let concat = self.live_history();
1862        let idx = pos - self.history_abs_start;
1863        (idx + 4 <= concat.len())
1864            .then(|| self.short_hash[self.hash4(&concat[idx..])])
1865            .into_iter()
1866            .flatten()
1867            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1868    }
1869
1870    fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
1871        let concat = self.live_history();
1872        let idx = pos - self.history_abs_start;
1873        (idx + 8 <= concat.len())
1874            .then(|| self.long_hash[self.hash8(&concat[idx..])])
1875            .into_iter()
1876            .flatten()
1877            .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
1878    }
1879
1880    fn hash4(&self, data: &[u8]) -> usize {
1881        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
1882        self.hash_index(value)
1883    }
1884
1885    fn hash8(&self, data: &[u8]) -> usize {
1886        let value = u64::from_le_bytes(data[..8].try_into().unwrap());
1887        self.hash_index(value)
1888    }
1889
1890    fn hash_index(&self, value: u64) -> usize {
1891        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
1892        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize
1893    }
1894}
1895
1896struct RowMatchGenerator {
1897    max_window_size: usize,
1898    window: VecDeque<Vec<u8>>,
1899    window_size: usize,
1900    history: Vec<u8>,
1901    history_start: usize,
1902    history_abs_start: usize,
1903    offset_hist: [u32; 3],
1904    row_hash_log: usize,
1905    row_log: usize,
1906    search_depth: usize,
1907    target_len: usize,
1908    lazy_depth: u8,
1909    row_heads: Vec<u8>,
1910    row_positions: Vec<usize>,
1911    row_tags: Vec<u8>,
1912}
1913
1914impl RowMatchGenerator {
1915    fn new(max_window_size: usize) -> Self {
1916        Self {
1917            max_window_size,
1918            window: VecDeque::new(),
1919            window_size: 0,
1920            history: Vec::new(),
1921            history_start: 0,
1922            history_abs_start: 0,
1923            offset_hist: [1, 4, 8],
1924            row_hash_log: ROW_HASH_BITS - ROW_LOG,
1925            row_log: ROW_LOG,
1926            search_depth: ROW_SEARCH_DEPTH,
1927            target_len: ROW_TARGET_LEN,
1928            lazy_depth: 1,
1929            row_heads: Vec::new(),
1930            row_positions: Vec::new(),
1931            row_tags: Vec::new(),
1932        }
1933    }
1934
1935    fn set_hash_bits(&mut self, bits: usize) {
1936        let clamped = bits.clamp(self.row_log + 1, ROW_HASH_BITS);
1937        let row_hash_log = clamped.saturating_sub(self.row_log);
1938        if self.row_hash_log != row_hash_log {
1939            self.row_hash_log = row_hash_log;
1940            self.row_heads.clear();
1941            self.row_positions.clear();
1942            self.row_tags.clear();
1943        }
1944    }
1945
1946    fn configure(&mut self, config: RowConfig) {
1947        self.row_log = config.row_log.clamp(4, 6);
1948        self.search_depth = config.search_depth;
1949        self.target_len = config.target_len;
1950        self.set_hash_bits(config.hash_bits.max(self.row_log + 1));
1951    }
1952
1953    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1954        self.window_size = 0;
1955        self.history.clear();
1956        self.history_start = 0;
1957        self.history_abs_start = 0;
1958        self.offset_hist = [1, 4, 8];
1959        self.row_heads.fill(0);
1960        self.row_positions.fill(ROW_EMPTY_SLOT);
1961        self.row_tags.fill(0);
1962        for mut data in self.window.drain(..) {
1963            data.resize(data.capacity(), 0);
1964            reuse_space(data);
1965        }
1966    }
1967
1968    fn get_last_space(&self) -> &[u8] {
1969        self.window.back().unwrap().as_slice()
1970    }
1971
1972    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1973        assert!(data.len() <= self.max_window_size);
1974        while self.window_size + data.len() > self.max_window_size {
1975            let removed = self.window.pop_front().unwrap();
1976            self.window_size -= removed.len();
1977            self.history_start += removed.len();
1978            self.history_abs_start += removed.len();
1979            reuse_space(removed);
1980        }
1981        self.compact_history();
1982        self.history.extend_from_slice(&data);
1983        self.window_size += data.len();
1984        self.window.push_back(data);
1985    }
1986
1987    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1988        while self.window_size > self.max_window_size {
1989            let removed = self.window.pop_front().unwrap();
1990            self.window_size -= removed.len();
1991            self.history_start += removed.len();
1992            self.history_abs_start += removed.len();
1993            reuse_space(removed);
1994        }
1995    }
1996
1997    fn skip_matching(&mut self) {
1998        self.ensure_tables();
1999        let current_len = self.window.back().unwrap().len();
2000        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2001        let backfill_start = self.backfill_start(current_abs_start);
2002        if backfill_start < current_abs_start {
2003            self.insert_positions(backfill_start, current_abs_start);
2004        }
2005        self.insert_positions(current_abs_start, current_abs_start + current_len);
2006    }
2007
2008    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2009        self.ensure_tables();
2010
2011        let current_len = self.window.back().unwrap().len();
2012        if current_len == 0 {
2013            return;
2014        }
2015        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2016        let backfill_start = self.backfill_start(current_abs_start);
2017        if backfill_start < current_abs_start {
2018            self.insert_positions(backfill_start, current_abs_start);
2019        }
2020
2021        let mut pos = 0usize;
2022        let mut literals_start = 0usize;
2023        while pos + ROW_MIN_MATCH_LEN <= current_len {
2024            let abs_pos = current_abs_start + pos;
2025            let lit_len = pos - literals_start;
2026
2027            let best = self.best_match(abs_pos, lit_len);
2028            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2029                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2030                let current = self.window.back().unwrap().as_slice();
2031                let start = candidate.start - current_abs_start;
2032                let literals = &current[literals_start..start];
2033                handle_sequence(Sequence::Triple {
2034                    literals,
2035                    offset: candidate.offset,
2036                    match_len: candidate.match_len,
2037                });
2038                let _ = encode_offset_with_history(
2039                    candidate.offset as u32,
2040                    literals.len() as u32,
2041                    &mut self.offset_hist,
2042                );
2043                pos = start + candidate.match_len;
2044                literals_start = pos;
2045            } else {
2046                self.insert_position(abs_pos);
2047                pos += 1;
2048            }
2049        }
2050
2051        while pos + ROW_HASH_KEY_LEN <= current_len {
2052            self.insert_position(current_abs_start + pos);
2053            pos += 1;
2054        }
2055
2056        if literals_start < current_len {
2057            let current = self.window.back().unwrap().as_slice();
2058            handle_sequence(Sequence::Literals {
2059                literals: &current[literals_start..],
2060            });
2061        }
2062    }
2063
2064    fn ensure_tables(&mut self) {
2065        let row_count = 1usize << self.row_hash_log;
2066        let row_entries = 1usize << self.row_log;
2067        let total = row_count * row_entries;
2068        if self.row_positions.len() != total {
2069            self.row_heads = alloc::vec![0; row_count];
2070            self.row_positions = alloc::vec![ROW_EMPTY_SLOT; total];
2071            self.row_tags = alloc::vec![0; total];
2072        }
2073    }
2074
2075    fn compact_history(&mut self) {
2076        if self.history_start == 0 {
2077            return;
2078        }
2079        if self.history_start >= self.max_window_size
2080            || self.history_start * 2 >= self.history.len()
2081        {
2082            self.history.drain(..self.history_start);
2083            self.history_start = 0;
2084        }
2085    }
2086
2087    fn live_history(&self) -> &[u8] {
2088        &self.history[self.history_start..]
2089    }
2090
2091    fn history_abs_end(&self) -> usize {
2092        self.history_abs_start + self.live_history().len()
2093    }
2094
2095    fn hash_and_row(&self, abs_pos: usize) -> Option<(usize, u8)> {
2096        let idx = abs_pos - self.history_abs_start;
2097        let concat = self.live_history();
2098        if idx + ROW_HASH_KEY_LEN > concat.len() {
2099            return None;
2100        }
2101        let value =
2102            u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
2103        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2104        let hash = value.wrapping_mul(PRIME);
2105        let total_bits = self.row_hash_log + ROW_TAG_BITS;
2106        let combined = hash >> (u64::BITS as usize - total_bits);
2107        let row_mask = (1usize << self.row_hash_log) - 1;
2108        let row = ((combined >> ROW_TAG_BITS) as usize) & row_mask;
2109        let tag = combined as u8;
2110        Some((row, tag))
2111    }
2112
2113    fn backfill_start(&self, current_abs_start: usize) -> usize {
2114        current_abs_start
2115            .saturating_sub(ROW_HASH_KEY_LEN - 1)
2116            .max(self.history_abs_start)
2117    }
2118
2119    fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2120        let rep = self.repcode_candidate(abs_pos, lit_len);
2121        let row = self.row_candidate(abs_pos, lit_len);
2122        best_len_offset_candidate(rep, row)
2123    }
2124
2125    fn pick_lazy_match(
2126        &self,
2127        abs_pos: usize,
2128        lit_len: usize,
2129        best: Option<MatchCandidate>,
2130    ) -> Option<MatchCandidate> {
2131        pick_lazy_match_shared(
2132            abs_pos,
2133            lit_len,
2134            best,
2135            LazyMatchConfig {
2136                target_len: self.target_len,
2137                min_match_len: ROW_MIN_MATCH_LEN,
2138                lazy_depth: self.lazy_depth,
2139                history_abs_end: self.history_abs_end(),
2140            },
2141            |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
2142        )
2143    }
2144
2145    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2146        repcode_candidate_shared(
2147            self.live_history(),
2148            self.history_abs_start,
2149            self.offset_hist,
2150            abs_pos,
2151            lit_len,
2152            ROW_MIN_MATCH_LEN,
2153        )
2154    }
2155
2156    fn row_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2157        let concat = self.live_history();
2158        let current_idx = abs_pos - self.history_abs_start;
2159        if current_idx + ROW_MIN_MATCH_LEN > concat.len() {
2160            return None;
2161        }
2162
2163        let (row, tag) = self.hash_and_row(abs_pos)?;
2164        let row_entries = 1usize << self.row_log;
2165        let row_mask = row_entries - 1;
2166        let row_base = row << self.row_log;
2167        let head = self.row_heads[row] as usize;
2168        let max_walk = self.search_depth.min(row_entries);
2169
2170        let mut best = None;
2171        for i in 0..max_walk {
2172            let slot = (head + i) & row_mask;
2173            let idx = row_base + slot;
2174            if self.row_tags[idx] != tag {
2175                continue;
2176            }
2177            let candidate_pos = self.row_positions[idx];
2178            if candidate_pos == ROW_EMPTY_SLOT
2179                || candidate_pos < self.history_abs_start
2180                || candidate_pos >= abs_pos
2181            {
2182                continue;
2183            }
2184            let candidate_idx = candidate_pos - self.history_abs_start;
2185            let match_len =
2186                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2187            if match_len >= ROW_MIN_MATCH_LEN {
2188                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2189                best = best_len_offset_candidate(best, Some(candidate));
2190                if best.is_some_and(|best| best.match_len >= self.target_len) {
2191                    return best;
2192                }
2193            }
2194        }
2195        best
2196    }
2197
2198    fn extend_backwards(
2199        &self,
2200        candidate_pos: usize,
2201        abs_pos: usize,
2202        match_len: usize,
2203        lit_len: usize,
2204    ) -> MatchCandidate {
2205        extend_backwards_shared(
2206            self.live_history(),
2207            self.history_abs_start,
2208            candidate_pos,
2209            abs_pos,
2210            match_len,
2211            lit_len,
2212        )
2213    }
2214
2215    fn insert_positions(&mut self, start: usize, end: usize) {
2216        for pos in start..end {
2217            self.insert_position(pos);
2218        }
2219    }
2220
2221    fn insert_position(&mut self, abs_pos: usize) {
2222        let Some((row, tag)) = self.hash_and_row(abs_pos) else {
2223            return;
2224        };
2225        let row_entries = 1usize << self.row_log;
2226        let row_mask = row_entries - 1;
2227        let row_base = row << self.row_log;
2228        let head = self.row_heads[row] as usize;
2229        let next = head.wrapping_sub(1) & row_mask;
2230        self.row_heads[row] = next as u8;
2231        self.row_tags[row_base + next] = tag;
2232        self.row_positions[row_base + next] = abs_pos;
2233    }
2234}
2235
2236struct HcMatchGenerator {
2237    max_window_size: usize,
2238    window: VecDeque<Vec<u8>>,
2239    window_size: usize,
2240    history: Vec<u8>,
2241    history_start: usize,
2242    history_abs_start: usize,
2243    position_base: usize,
2244    offset_hist: [u32; 3],
2245    hash_table: Vec<u32>,
2246    chain_table: Vec<u32>,
2247    lazy_depth: u8,
2248    hash_log: usize,
2249    chain_log: usize,
2250    search_depth: usize,
2251    target_len: usize,
2252}
2253
2254impl HcMatchGenerator {
2255    fn new(max_window_size: usize) -> Self {
2256        Self {
2257            max_window_size,
2258            window: VecDeque::new(),
2259            window_size: 0,
2260            history: Vec::new(),
2261            history_start: 0,
2262            history_abs_start: 0,
2263            position_base: 0,
2264            offset_hist: [1, 4, 8],
2265            hash_table: Vec::new(),
2266            chain_table: Vec::new(),
2267            lazy_depth: 2,
2268            hash_log: HC_HASH_LOG,
2269            chain_log: HC_CHAIN_LOG,
2270            search_depth: HC_SEARCH_DEPTH,
2271            target_len: HC_TARGET_LEN,
2272        }
2273    }
2274
2275    fn configure(&mut self, config: HcConfig) {
2276        let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
2277        self.hash_log = config.hash_log;
2278        self.chain_log = config.chain_log;
2279        self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
2280        self.target_len = config.target_len;
2281        if resize && !self.hash_table.is_empty() {
2282            // Force reallocation on next ensure_tables() call.
2283            self.hash_table.clear();
2284            self.chain_table.clear();
2285        }
2286    }
2287
2288    fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2289        self.window_size = 0;
2290        self.history.clear();
2291        self.history_start = 0;
2292        self.history_abs_start = 0;
2293        self.position_base = 0;
2294        self.offset_hist = [1, 4, 8];
2295        if !self.hash_table.is_empty() {
2296            self.hash_table.fill(HC_EMPTY);
2297            self.chain_table.fill(HC_EMPTY);
2298        }
2299        for mut data in self.window.drain(..) {
2300            data.resize(data.capacity(), 0);
2301            reuse_space(data);
2302        }
2303    }
2304
2305    fn get_last_space(&self) -> &[u8] {
2306        self.window.back().unwrap().as_slice()
2307    }
2308
2309    // History duplicates window data for O(1) contiguous access during match
2310    // finding (common_prefix_len, extend_backwards). Same pattern as
2311    // DfastMatchGenerator. Peak: ~2x window size for data buffers + 6 MB tables.
2312    fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2313        assert!(data.len() <= self.max_window_size);
2314        while self.window_size + data.len() > self.max_window_size {
2315            let removed = self.window.pop_front().unwrap();
2316            self.window_size -= removed.len();
2317            self.history_start += removed.len();
2318            self.history_abs_start += removed.len();
2319            reuse_space(removed);
2320        }
2321        self.compact_history();
2322        self.history.extend_from_slice(&data);
2323        self.window_size += data.len();
2324        self.window.push_back(data);
2325    }
2326
2327    fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2328        while self.window_size > self.max_window_size {
2329            let removed = self.window.pop_front().unwrap();
2330            self.window_size -= removed.len();
2331            self.history_start += removed.len();
2332            self.history_abs_start += removed.len();
2333            reuse_space(removed);
2334        }
2335    }
2336
2337    /// Backfill positions from the tail of the previous slice that couldn't be
2338    /// hashed at the time (insert_position needs 4 bytes of lookahead).
2339    fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
2340        let backfill_start = current_abs_start
2341            .saturating_sub(3)
2342            .max(self.history_abs_start);
2343        if backfill_start < current_abs_start {
2344            self.insert_positions(backfill_start, current_abs_start);
2345        }
2346    }
2347
2348    fn skip_matching(&mut self) {
2349        self.ensure_tables();
2350        let current_len = self.window.back().unwrap().len();
2351        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2352        self.backfill_boundary_positions(current_abs_start);
2353        self.insert_positions(current_abs_start, current_abs_start + current_len);
2354    }
2355
2356    fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2357        self.ensure_tables();
2358
2359        let current_len = self.window.back().unwrap().len();
2360        if current_len == 0 {
2361            return;
2362        }
2363
2364        let current_abs_start = self.history_abs_start + self.window_size - current_len;
2365        self.backfill_boundary_positions(current_abs_start);
2366
2367        let mut pos = 0usize;
2368        let mut literals_start = 0usize;
2369        while pos + HC_MIN_MATCH_LEN <= current_len {
2370            let abs_pos = current_abs_start + pos;
2371            let lit_len = pos - literals_start;
2372
2373            let best = self.find_best_match(abs_pos, lit_len);
2374            if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2375                self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2376                let current = self.window.back().unwrap().as_slice();
2377                let start = candidate.start - current_abs_start;
2378                let literals = &current[literals_start..start];
2379                handle_sequence(Sequence::Triple {
2380                    literals,
2381                    offset: candidate.offset,
2382                    match_len: candidate.match_len,
2383                });
2384                let _ = encode_offset_with_history(
2385                    candidate.offset as u32,
2386                    literals.len() as u32,
2387                    &mut self.offset_hist,
2388                );
2389                pos = start + candidate.match_len;
2390                literals_start = pos;
2391            } else {
2392                self.insert_position(abs_pos);
2393                pos += 1;
2394            }
2395        }
2396
2397        // Insert remaining hashable positions in the tail (the matching loop
2398        // stops at HC_MIN_MATCH_LEN but insert_position only needs 4 bytes).
2399        while pos + 4 <= current_len {
2400            self.insert_position(current_abs_start + pos);
2401            pos += 1;
2402        }
2403
2404        if literals_start < current_len {
2405            let current = self.window.back().unwrap().as_slice();
2406            handle_sequence(Sequence::Literals {
2407                literals: &current[literals_start..],
2408            });
2409        }
2410    }
2411
2412    fn ensure_tables(&mut self) {
2413        if self.hash_table.is_empty() {
2414            self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
2415            self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
2416        }
2417    }
2418
2419    fn compact_history(&mut self) {
2420        if self.history_start == 0 {
2421            return;
2422        }
2423        if self.history_start >= self.max_window_size
2424            || self.history_start * 2 >= self.history.len()
2425        {
2426            self.history.drain(..self.history_start);
2427            self.history_start = 0;
2428        }
2429    }
2430
2431    fn live_history(&self) -> &[u8] {
2432        &self.history[self.history_start..]
2433    }
2434
2435    fn history_abs_end(&self) -> usize {
2436        self.history_abs_start + self.live_history().len()
2437    }
2438
2439    fn hash_position(&self, data: &[u8]) -> usize {
2440        let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2441        const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
2442        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize
2443    }
2444
2445    fn relative_position(&self, abs_pos: usize) -> Option<u32> {
2446        let rel = abs_pos.checked_sub(self.position_base)?;
2447        let rel_u32 = u32::try_from(rel).ok()?;
2448        // Positions are stored as (relative_pos + 1), with 0 reserved as the
2449        // empty sentinel. So the raw relative position itself must stay
2450        // strictly below u32::MAX.
2451        (rel_u32 < u32::MAX).then_some(rel_u32)
2452    }
2453
2454    fn maybe_rebase_positions(&mut self, abs_pos: usize) {
2455        let needs_rebase = self
2456            .relative_position(abs_pos)
2457            .is_none_or(|relative| relative >= u32::MAX - 1);
2458        if !needs_rebase {
2459            return;
2460        }
2461
2462        // Keep all live history addressable after rebase.
2463        self.position_base = self.history_abs_start;
2464        self.hash_table.fill(HC_EMPTY);
2465        self.chain_table.fill(HC_EMPTY);
2466
2467        let history_start = self.history_abs_start;
2468        // Rebuild only the already-inserted prefix. The caller inserts abs_pos
2469        // immediately after this, and later positions are added in-order.
2470        for pos in history_start..abs_pos {
2471            self.insert_position_no_rebase(pos);
2472        }
2473    }
2474
2475    fn insert_position(&mut self, abs_pos: usize) {
2476        self.maybe_rebase_positions(abs_pos);
2477        self.insert_position_no_rebase(abs_pos);
2478    }
2479
2480    fn insert_position_no_rebase(&mut self, abs_pos: usize) {
2481        let idx = abs_pos - self.history_abs_start;
2482        let concat = self.live_history();
2483        if idx + 4 > concat.len() {
2484            return;
2485        }
2486        let hash = self.hash_position(&concat[idx..]);
2487        let Some(relative_pos) = self.relative_position(abs_pos) else {
2488            return;
2489        };
2490        let stored = relative_pos + 1;
2491        let chain_idx = relative_pos as usize & ((1 << self.chain_log) - 1);
2492        let prev = self.hash_table[hash];
2493        self.chain_table[chain_idx] = prev;
2494        self.hash_table[hash] = stored;
2495    }
2496
2497    fn insert_positions(&mut self, start: usize, end: usize) {
2498        for pos in start..end {
2499            self.insert_position(pos);
2500        }
2501    }
2502
2503    // Fixed-size stack array is intentional: it avoids heap allocation on
2504    // the hot path and the sentinel loop exits at self.search_depth.
2505    fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
2506        let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
2507        let idx = abs_pos - self.history_abs_start;
2508        let concat = self.live_history();
2509        if idx + 4 > concat.len() {
2510            return buf;
2511        }
2512        let hash = self.hash_position(&concat[idx..]);
2513        let chain_mask = (1 << self.chain_log) - 1;
2514
2515        let mut cur = self.hash_table[hash];
2516        let mut filled = 0;
2517        // Follow chain up to search_depth valid candidates, skipping stale
2518        // entries (evicted from window) instead of stopping at them.
2519        // Stored values are (relative_pos + 1); decode with wrapping_sub(1)
2520        // and recover absolute position via position_base + relative.
2521        // Break on self-loops (masked chain_idx collision at periodicity).
2522        // Cap total steps at 4x search depth to bound time spent skipping
2523        // stale entries while still finding valid candidates deeper in chain.
2524        let mut steps = 0;
2525        let max_chain_steps = self.search_depth * 4;
2526        while filled < self.search_depth && steps < max_chain_steps {
2527            if cur == HC_EMPTY {
2528                break;
2529            }
2530            let candidate_rel = cur.wrapping_sub(1) as usize;
2531            let candidate_abs = self.position_base + candidate_rel;
2532            let next = self.chain_table[candidate_rel & chain_mask];
2533            steps += 1;
2534            if next == cur {
2535                // Self-loop: two positions share chain_idx, stop to avoid
2536                // spinning on the same candidate forever.
2537                if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
2538                    buf[filled] = candidate_abs;
2539                }
2540                break;
2541            }
2542            cur = next;
2543            if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
2544                continue;
2545            }
2546            buf[filled] = candidate_abs;
2547            filled += 1;
2548        }
2549        buf
2550    }
2551
2552    fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2553        let rep = self.repcode_candidate(abs_pos, lit_len);
2554        let hash = self.hash_chain_candidate(abs_pos, lit_len);
2555        Self::better_candidate(rep, hash)
2556    }
2557
2558    fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2559        let concat = self.live_history();
2560        let current_idx = abs_pos - self.history_abs_start;
2561        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2562            return None;
2563        }
2564
2565        let mut best: Option<MatchCandidate> = None;
2566        for candidate_abs in self.chain_candidates(abs_pos) {
2567            if candidate_abs == usize::MAX {
2568                break;
2569            }
2570            let candidate_idx = candidate_abs - self.history_abs_start;
2571            let match_len =
2572                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2573            if match_len >= HC_MIN_MATCH_LEN {
2574                let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
2575                best = Self::better_candidate(best, Some(candidate));
2576                if best.is_some_and(|b| b.match_len >= self.target_len) {
2577                    return best;
2578                }
2579            }
2580        }
2581        best
2582    }
2583
2584    fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2585        let reps = if lit_len == 0 {
2586            [
2587                Some(self.offset_hist[1] as usize),
2588                Some(self.offset_hist[2] as usize),
2589                (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
2590            ]
2591        } else {
2592            [
2593                Some(self.offset_hist[0] as usize),
2594                Some(self.offset_hist[1] as usize),
2595                Some(self.offset_hist[2] as usize),
2596            ]
2597        };
2598
2599        let concat = self.live_history();
2600        let current_idx = abs_pos - self.history_abs_start;
2601        if current_idx + HC_MIN_MATCH_LEN > concat.len() {
2602            return None;
2603        }
2604
2605        let mut best = None;
2606        for rep in reps.into_iter().flatten() {
2607            if rep == 0 || rep > abs_pos {
2608                continue;
2609            }
2610            let candidate_pos = abs_pos - rep;
2611            if candidate_pos < self.history_abs_start {
2612                continue;
2613            }
2614            let candidate_idx = candidate_pos - self.history_abs_start;
2615            let match_len =
2616                MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2617            if match_len >= HC_MIN_MATCH_LEN {
2618                let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2619                best = Self::better_candidate(best, Some(candidate));
2620            }
2621        }
2622        best
2623    }
2624
2625    fn extend_backwards(
2626        &self,
2627        mut candidate_pos: usize,
2628        mut abs_pos: usize,
2629        mut match_len: usize,
2630        lit_len: usize,
2631    ) -> MatchCandidate {
2632        let concat = self.live_history();
2633        let min_abs_pos = abs_pos - lit_len;
2634        while abs_pos > min_abs_pos
2635            && candidate_pos > self.history_abs_start
2636            && concat[candidate_pos - self.history_abs_start - 1]
2637                == concat[abs_pos - self.history_abs_start - 1]
2638        {
2639            candidate_pos -= 1;
2640            abs_pos -= 1;
2641            match_len += 1;
2642        }
2643        MatchCandidate {
2644            start: abs_pos,
2645            offset: abs_pos - candidate_pos,
2646            match_len,
2647        }
2648    }
2649
2650    fn better_candidate(
2651        lhs: Option<MatchCandidate>,
2652        rhs: Option<MatchCandidate>,
2653    ) -> Option<MatchCandidate> {
2654        match (lhs, rhs) {
2655            (None, other) | (other, None) => other,
2656            (Some(lhs), Some(rhs)) => {
2657                let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
2658                let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
2659                if rhs_gain > lhs_gain {
2660                    Some(rhs)
2661                } else {
2662                    Some(lhs)
2663                }
2664            }
2665        }
2666    }
2667
2668    fn match_gain(match_len: usize, offset: usize) -> i32 {
2669        debug_assert!(
2670            offset > 0,
2671            "zstd offsets are 1-indexed, offset=0 is invalid"
2672        );
2673        let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
2674        (match_len as i32) * 4 - offset_bits
2675    }
2676
2677    // Lazy lookahead queries pos+1/pos+2 before they are inserted into hash
2678    // tables — matching C zstd behavior. Seeding before comparing would let a
2679    // position match against itself, changing semantics.
2680    fn pick_lazy_match(
2681        &self,
2682        abs_pos: usize,
2683        lit_len: usize,
2684        best: Option<MatchCandidate>,
2685    ) -> Option<MatchCandidate> {
2686        let best = best?;
2687        if best.match_len >= self.target_len
2688            || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
2689        {
2690            return Some(best);
2691        }
2692
2693        let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
2694
2695        // Lazy check: evaluate pos+1
2696        let next = self.find_best_match(abs_pos + 1, lit_len + 1);
2697        if let Some(next) = next {
2698            let next_gain = Self::match_gain(next.match_len, next.offset);
2699            if next_gain > current_gain {
2700                return None;
2701            }
2702        }
2703
2704        // Lazy2 check: also evaluate pos+2
2705        if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
2706            let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
2707            if let Some(next2) = next2 {
2708                let next2_gain = Self::match_gain(next2.match_len, next2.offset);
2709                // Must beat current gain + extra literal cost
2710                if next2_gain > current_gain + 4 {
2711                    return None;
2712                }
2713            }
2714        }
2715
2716        Some(best)
2717    }
2718}
2719
2720#[test]
2721fn matches() {
2722    let mut matcher = MatchGenerator::new(1000);
2723    let mut original_data = Vec::new();
2724    let mut reconstructed = Vec::new();
2725
2726    let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
2727        Sequence::Literals { literals } => {
2728            assert!(!literals.is_empty());
2729            reconstructed.extend_from_slice(literals);
2730        }
2731        Sequence::Triple {
2732            literals,
2733            offset,
2734            match_len,
2735        } => {
2736            assert!(offset > 0);
2737            assert!(match_len >= MIN_MATCH_LEN);
2738            reconstructed.extend_from_slice(literals);
2739            assert!(offset <= reconstructed.len());
2740            let start = reconstructed.len() - offset;
2741            for i in 0..match_len {
2742                let byte = reconstructed[start + i];
2743                reconstructed.push(byte);
2744            }
2745        }
2746    };
2747
2748    matcher.add_data(
2749        alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
2750        SuffixStore::with_capacity(100),
2751        |_, _| {},
2752    );
2753    original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
2754
2755    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2756
2757    assert!(!matcher.next_sequence(|_| {}));
2758
2759    matcher.add_data(
2760        alloc::vec![
2761            1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2762        ],
2763        SuffixStore::with_capacity(100),
2764        |_, _| {},
2765    );
2766    original_data.extend_from_slice(&[
2767        1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
2768    ]);
2769
2770    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2771    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2772    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2773    assert!(!matcher.next_sequence(|_| {}));
2774
2775    matcher.add_data(
2776        alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
2777        SuffixStore::with_capacity(100),
2778        |_, _| {},
2779    );
2780    original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
2781
2782    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2783    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2784    assert!(!matcher.next_sequence(|_| {}));
2785
2786    matcher.add_data(
2787        alloc::vec![0, 0, 0, 0, 0],
2788        SuffixStore::with_capacity(100),
2789        |_, _| {},
2790    );
2791    original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
2792
2793    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2794    assert!(!matcher.next_sequence(|_| {}));
2795
2796    matcher.add_data(
2797        alloc::vec![7, 8, 9, 10, 11],
2798        SuffixStore::with_capacity(100),
2799        |_, _| {},
2800    );
2801    original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
2802
2803    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2804    assert!(!matcher.next_sequence(|_| {}));
2805
2806    matcher.add_data(
2807        alloc::vec![1, 3, 5, 7, 9],
2808        SuffixStore::with_capacity(100),
2809        |_, _| {},
2810    );
2811    matcher.skip_matching();
2812    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2813    reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
2814    assert!(!matcher.next_sequence(|_| {}));
2815
2816    matcher.add_data(
2817        alloc::vec![1, 3, 5, 7, 9],
2818        SuffixStore::with_capacity(100),
2819        |_, _| {},
2820    );
2821    original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
2822
2823    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2824    assert!(!matcher.next_sequence(|_| {}));
2825
2826    matcher.add_data(
2827        alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
2828        SuffixStore::with_capacity(100),
2829        |_, _| {},
2830    );
2831    original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
2832
2833    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2834    matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
2835    assert!(!matcher.next_sequence(|_| {}));
2836
2837    assert_eq!(reconstructed, original_data);
2838}
2839
2840#[test]
2841fn dfast_matches_roundtrip_multi_block_pattern() {
2842    let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
2843    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2844    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2845
2846    let mut matcher = DfastMatchGenerator::new(1 << 22);
2847    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2848        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2849        Sequence::Triple {
2850            literals,
2851            offset,
2852            match_len,
2853        } => {
2854            decoded.extend_from_slice(literals);
2855            let start = decoded.len() - offset;
2856            for i in 0..match_len {
2857                let byte = decoded[start + i];
2858                decoded.push(byte);
2859            }
2860        }
2861    };
2862
2863    matcher.add_data(first_block.clone(), |_| {});
2864    let mut history = Vec::new();
2865    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2866    assert_eq!(history, first_block);
2867
2868    matcher.add_data(second_block.clone(), |_| {});
2869    let prefix_len = history.len();
2870    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
2871
2872    assert_eq!(&history[prefix_len..], second_block.as_slice());
2873}
2874
2875#[test]
2876fn driver_switches_backends_and_initializes_dfast_via_reset() {
2877    let mut driver = MatchGeneratorDriver::new(32, 2);
2878
2879    driver.reset(CompressionLevel::Default);
2880    assert_eq!(driver.active_backend, MatcherBackend::Dfast);
2881    assert_eq!(driver.window_size(), (1u64 << 22));
2882
2883    let mut first = driver.get_next_space();
2884    first[..12].copy_from_slice(b"abcabcabcabc");
2885    first.truncate(12);
2886    driver.commit_space(first);
2887    assert_eq!(driver.get_last_space(), b"abcabcabcabc");
2888    driver.skip_matching();
2889
2890    let mut second = driver.get_next_space();
2891    second[..12].copy_from_slice(b"abcabcabcabc");
2892    second.truncate(12);
2893    driver.commit_space(second);
2894
2895    let mut reconstructed = b"abcabcabcabc".to_vec();
2896    driver.start_matching(|seq| match seq {
2897        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
2898        Sequence::Triple {
2899            literals,
2900            offset,
2901            match_len,
2902        } => {
2903            reconstructed.extend_from_slice(literals);
2904            let start = reconstructed.len() - offset;
2905            for i in 0..match_len {
2906                let byte = reconstructed[start + i];
2907                reconstructed.push(byte);
2908            }
2909        }
2910    });
2911    assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
2912
2913    driver.reset(CompressionLevel::Fastest);
2914    assert_eq!(driver.window_size(), (1u64 << 17));
2915}
2916
2917#[test]
2918fn driver_level4_selects_row_backend() {
2919    let mut driver = MatchGeneratorDriver::new(32, 2);
2920    driver.reset(CompressionLevel::Level(4));
2921    assert_eq!(driver.active_backend, MatcherBackend::Row);
2922}
2923
2924#[test]
2925fn driver_small_source_hint_shrinks_dfast_hash_tables() {
2926    let mut driver = MatchGeneratorDriver::new(32, 2);
2927
2928    driver.reset(CompressionLevel::Level(2));
2929    let mut space = driver.get_next_space();
2930    space[..12].copy_from_slice(b"abcabcabcabc");
2931    space.truncate(12);
2932    driver.commit_space(space);
2933    driver.skip_matching();
2934    let full_tables = driver.dfast_matcher().short_hash.len();
2935    assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
2936
2937    driver.set_source_size_hint(1024);
2938    driver.reset(CompressionLevel::Level(2));
2939    let mut space = driver.get_next_space();
2940    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2941    space.truncate(12);
2942    driver.commit_space(space);
2943    driver.skip_matching();
2944    let hinted_tables = driver.dfast_matcher().short_hash.len();
2945
2946    assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
2947    assert_eq!(hinted_tables, 1 << MIN_HINTED_WINDOW_LOG);
2948    assert!(
2949        hinted_tables < full_tables,
2950        "tiny source hint should reduce dfast table footprint"
2951    );
2952}
2953
2954#[test]
2955fn driver_small_source_hint_shrinks_row_hash_tables() {
2956    let mut driver = MatchGeneratorDriver::new(32, 2);
2957
2958    driver.reset(CompressionLevel::Level(4));
2959    let mut space = driver.get_next_space();
2960    space[..12].copy_from_slice(b"abcabcabcabc");
2961    space.truncate(12);
2962    driver.commit_space(space);
2963    driver.skip_matching();
2964    let full_rows = driver.row_matcher().row_heads.len();
2965    assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
2966
2967    driver.set_source_size_hint(1024);
2968    driver.reset(CompressionLevel::Level(4));
2969    let mut space = driver.get_next_space();
2970    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
2971    space.truncate(12);
2972    driver.commit_space(space);
2973    driver.skip_matching();
2974    let hinted_rows = driver.row_matcher().row_heads.len();
2975
2976    assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
2977    assert_eq!(
2978        hinted_rows,
2979        1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
2980    );
2981    assert!(
2982        hinted_rows < full_rows,
2983        "tiny source hint should reduce row hash table footprint"
2984    );
2985}
2986
2987#[test]
2988fn row_matches_roundtrip_multi_block_pattern() {
2989    let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
2990    let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2991    let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
2992
2993    let mut matcher = RowMatchGenerator::new(1 << 22);
2994    matcher.configure(ROW_CONFIG);
2995    matcher.ensure_tables();
2996    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
2997        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
2998        Sequence::Triple {
2999            literals,
3000            offset,
3001            match_len,
3002        } => {
3003            decoded.extend_from_slice(literals);
3004            let start = decoded.len() - offset;
3005            for i in 0..match_len {
3006                let byte = decoded[start + i];
3007                decoded.push(byte);
3008            }
3009        }
3010    };
3011
3012    matcher.add_data(first_block.clone(), |_| {});
3013    let mut history = Vec::new();
3014    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3015    assert_eq!(history, first_block);
3016
3017    matcher.add_data(second_block.clone(), |_| {});
3018    let prefix_len = history.len();
3019    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3020
3021    assert_eq!(&history[prefix_len..], second_block.as_slice());
3022
3023    // Force a literals-only pass so the Sequence::Literals arm is exercised.
3024    let third_block: Vec<u8> = (0u8..=255).collect();
3025    matcher.add_data(third_block.clone(), |_| {});
3026    let third_prefix = history.len();
3027    matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3028    assert_eq!(&history[third_prefix..], third_block.as_slice());
3029}
3030
3031#[test]
3032fn row_short_block_emits_literals_only() {
3033    let mut matcher = RowMatchGenerator::new(1 << 22);
3034    matcher.configure(ROW_CONFIG);
3035
3036    matcher.add_data(b"abcde".to_vec(), |_| {});
3037
3038    let mut saw_triple = false;
3039    let mut reconstructed = Vec::new();
3040    matcher.start_matching(|seq| match seq {
3041        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
3042        Sequence::Triple { .. } => saw_triple = true,
3043    });
3044
3045    assert!(
3046        !saw_triple,
3047        "row backend must not emit triples for short blocks"
3048    );
3049    assert_eq!(reconstructed, b"abcde");
3050
3051    // Then feed a clearly matchable block and ensure the Triple arm is reachable.
3052    saw_triple = false;
3053    matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
3054    matcher.start_matching(|seq| {
3055        if let Sequence::Triple { .. } = seq {
3056            saw_triple = true;
3057        }
3058    });
3059    assert!(
3060        saw_triple,
3061        "row backend should emit triples on repeated data"
3062    );
3063}
3064
3065#[test]
3066fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
3067    let mut matcher = RowMatchGenerator::new(1 << 22);
3068    matcher.configure(ROW_CONFIG);
3069    matcher.add_data(b"abcabc".to_vec(), |_| {});
3070
3071    let best = MatchCandidate {
3072        start: 0,
3073        offset: 1,
3074        match_len: ROW_MIN_MATCH_LEN,
3075    };
3076    let picked = matcher
3077        .pick_lazy_match(0, 0, Some(best))
3078        .expect("best candidate must survive");
3079
3080    assert_eq!(picked.start, best.start);
3081    assert_eq!(picked.offset, best.offset);
3082    assert_eq!(picked.match_len, best.match_len);
3083}
3084
3085#[test]
3086fn row_backfills_previous_block_tail_for_cross_boundary_match() {
3087    let mut matcher = RowMatchGenerator::new(1 << 22);
3088    matcher.configure(ROW_CONFIG);
3089
3090    let mut first_block = alloc::vec![0xA5; 64];
3091    first_block.extend_from_slice(b"XYZ");
3092    let second_block = b"XYZXYZtail".to_vec();
3093
3094    let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3095        Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3096        Sequence::Triple {
3097            literals,
3098            offset,
3099            match_len,
3100        } => {
3101            decoded.extend_from_slice(literals);
3102            let start = decoded.len() - offset;
3103            for i in 0..match_len {
3104                let byte = decoded[start + i];
3105                decoded.push(byte);
3106            }
3107        }
3108    };
3109
3110    matcher.add_data(first_block.clone(), |_| {});
3111    let mut reconstructed = Vec::new();
3112    matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
3113    assert_eq!(reconstructed, first_block);
3114
3115    matcher.add_data(second_block.clone(), |_| {});
3116    let mut saw_cross_boundary = false;
3117    let prefix_len = reconstructed.len();
3118    matcher.start_matching(|seq| {
3119        if let Sequence::Triple {
3120            literals,
3121            offset,
3122            match_len,
3123        } = seq
3124            && literals.is_empty()
3125            && offset == 3
3126            && match_len >= ROW_MIN_MATCH_LEN
3127        {
3128            saw_cross_boundary = true;
3129        }
3130        replay_sequence(&mut reconstructed, seq);
3131    });
3132
3133    assert!(
3134        saw_cross_boundary,
3135        "row matcher should reuse the 3-byte previous-block tail"
3136    );
3137    assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
3138}
3139
3140#[test]
3141fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
3142    let mut driver = MatchGeneratorDriver::new(32, 2);
3143
3144    driver.reset(CompressionLevel::Level(2));
3145    let mut space = driver.get_next_space();
3146    space[..12].copy_from_slice(b"abcabcabcabc");
3147    space.truncate(12);
3148    driver.commit_space(space);
3149    driver.skip_matching();
3150
3151    let table_len = driver.dfast_matcher().short_hash.len();
3152    assert_eq!(
3153        table_len,
3154        1 << DFAST_HASH_BITS,
3155        "unhinted Level(2) should keep default dfast table size"
3156    );
3157}
3158
3159#[test]
3160fn simple_backend_rejects_undersized_pooled_suffix_store() {
3161    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3162    driver.reset(CompressionLevel::Fastest);
3163
3164    driver.suffix_pool.push(SuffixStore::with_capacity(1024));
3165
3166    let mut space = driver.get_next_space();
3167    space.clear();
3168    space.resize(4096, 0xAB);
3169    driver.commit_space(space);
3170
3171    let last_suffix_slots = driver
3172        .match_generator
3173        .window
3174        .last()
3175        .expect("window entry must exist after commit")
3176        .suffixes
3177        .slots
3178        .len();
3179    assert!(
3180        last_suffix_slots >= 4096,
3181        "undersized pooled suffix store must not be reused for larger blocks"
3182    );
3183}
3184
3185#[test]
3186fn source_hint_clamps_driver_slice_size_to_window() {
3187    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3188    driver.set_source_size_hint(1024);
3189    driver.reset(CompressionLevel::Default);
3190
3191    let window = driver.window_size() as usize;
3192    assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
3193    assert_eq!(driver.slice_size, window);
3194
3195    let space = driver.get_next_space();
3196    assert_eq!(space.len(), window);
3197    driver.commit_space(space);
3198}
3199
3200#[test]
3201fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
3202    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3203    driver.reset(CompressionLevel::Default);
3204
3205    let large = driver.get_next_space();
3206    let large_capacity = large.capacity();
3207    assert!(large_capacity >= 128 * 1024);
3208    driver.commit_space(large);
3209
3210    driver.set_source_size_hint(1024);
3211    driver.reset(CompressionLevel::Default);
3212
3213    let small = driver.get_next_space();
3214    assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
3215    assert!(
3216        small.capacity() >= large_capacity,
3217        "pooled buffer capacity should be preserved to avoid shrink/grow churn"
3218    );
3219}
3220
3221#[test]
3222fn driver_best_to_fastest_releases_oversized_hc_tables() {
3223    let mut driver = MatchGeneratorDriver::new(32, 2);
3224
3225    // Initialize at Best — allocates large HC tables (2M hash, 1M chain).
3226    driver.reset(CompressionLevel::Best);
3227    assert_eq!(driver.window_size(), (1u64 << 24));
3228
3229    // Feed data so tables are actually allocated via ensure_tables().
3230    let mut space = driver.get_next_space();
3231    space[..12].copy_from_slice(b"abcabcabcabc");
3232    space.truncate(12);
3233    driver.commit_space(space);
3234    driver.skip_matching();
3235
3236    // Switch to Fastest — must release HC tables.
3237    driver.reset(CompressionLevel::Fastest);
3238    assert_eq!(driver.window_size(), (1u64 << 17));
3239
3240    // HC matcher should have empty tables after backend switch.
3241    let hc = driver.hc_match_generator.as_ref().unwrap();
3242    assert!(
3243        hc.hash_table.is_empty(),
3244        "HC hash_table should be released after switching away from Best"
3245    );
3246    assert!(
3247        hc.chain_table.is_empty(),
3248        "HC chain_table should be released after switching away from Best"
3249    );
3250}
3251
3252#[test]
3253fn driver_better_to_best_resizes_hc_tables() {
3254    let mut driver = MatchGeneratorDriver::new(32, 2);
3255
3256    // Initialize at Better — allocates small HC tables (1M hash, 512K chain).
3257    driver.reset(CompressionLevel::Better);
3258    assert_eq!(driver.window_size(), (1u64 << 23));
3259
3260    let mut space = driver.get_next_space();
3261    space[..12].copy_from_slice(b"abcabcabcabc");
3262    space.truncate(12);
3263    driver.commit_space(space);
3264    driver.skip_matching();
3265
3266    let hc = driver.hc_match_generator.as_ref().unwrap();
3267    let better_hash_len = hc.hash_table.len();
3268    let better_chain_len = hc.chain_table.len();
3269
3270    // Switch to Best — must resize to larger tables.
3271    driver.reset(CompressionLevel::Best);
3272    assert_eq!(driver.window_size(), (1u64 << 24));
3273
3274    // Feed data to trigger ensure_tables with new sizes.
3275    let mut space = driver.get_next_space();
3276    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3277    space.truncate(12);
3278    driver.commit_space(space);
3279    driver.skip_matching();
3280
3281    let hc = driver.hc_match_generator.as_ref().unwrap();
3282    assert!(
3283        hc.hash_table.len() > better_hash_len,
3284        "Best hash_table ({}) should be larger than Better ({})",
3285        hc.hash_table.len(),
3286        better_hash_len
3287    );
3288    assert!(
3289        hc.chain_table.len() > better_chain_len,
3290        "Best chain_table ({}) should be larger than Better ({})",
3291        hc.chain_table.len(),
3292        better_chain_len
3293    );
3294}
3295
3296#[test]
3297fn prime_with_dictionary_preserves_history_for_first_full_block() {
3298    let mut driver = MatchGeneratorDriver::new(8, 1);
3299    driver.reset(CompressionLevel::Fastest);
3300
3301    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3302
3303    let mut space = driver.get_next_space();
3304    space.clear();
3305    space.extend_from_slice(b"abcdefgh");
3306    driver.commit_space(space);
3307
3308    let mut saw_match = false;
3309    driver.start_matching(|seq| {
3310        if let Sequence::Triple {
3311            literals,
3312            offset,
3313            match_len,
3314        } = seq
3315            && literals.is_empty()
3316            && offset == 8
3317            && match_len >= MIN_MATCH_LEN
3318        {
3319            saw_match = true;
3320        }
3321    });
3322
3323    assert!(
3324        saw_match,
3325        "first full block should still match dictionary-primed history"
3326    );
3327}
3328
3329#[test]
3330fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
3331    let mut driver = MatchGeneratorDriver::new(8, 1);
3332    driver.reset(CompressionLevel::Fastest);
3333
3334    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3335
3336    let mut space = driver.get_next_space();
3337    space.clear();
3338    space.extend_from_slice(b"abcdefgh");
3339    driver.commit_space(space);
3340
3341    let mut saw_match = false;
3342    driver.start_matching(|seq| {
3343        if let Sequence::Triple {
3344            literals,
3345            offset,
3346            match_len,
3347        } = seq
3348            && literals.is_empty()
3349            && offset == 24
3350            && match_len >= MIN_MATCH_LEN
3351        {
3352            saw_match = true;
3353        }
3354    });
3355
3356    assert!(
3357        saw_match,
3358        "dictionary bytes should remain addressable until frame output exceeds the live window"
3359    );
3360}
3361
3362#[test]
3363fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
3364    let mut driver = MatchGeneratorDriver::new(8, 1);
3365    driver.reset(CompressionLevel::Fastest);
3366
3367    driver.prime_with_dictionary(&[], [11, 7, 3]);
3368
3369    assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
3370}
3371
3372#[test]
3373fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
3374    let mut driver = MatchGeneratorDriver::new(8, 1);
3375    driver.reset(CompressionLevel::Level(2));
3376
3377    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3378
3379    let mut space = driver.get_next_space();
3380    space.clear();
3381    space.extend_from_slice(b"abcdefgh");
3382    driver.commit_space(space);
3383
3384    let mut saw_match = false;
3385    driver.start_matching(|seq| {
3386        if let Sequence::Triple {
3387            literals,
3388            offset,
3389            match_len,
3390        } = seq
3391            && literals.is_empty()
3392            && offset == 8
3393            && match_len >= DFAST_MIN_MATCH_LEN
3394        {
3395            saw_match = true;
3396        }
3397    });
3398
3399    assert!(
3400        saw_match,
3401        "dfast backend should match dictionary-primed history in first full block"
3402    );
3403}
3404
3405#[test]
3406fn prime_with_dictionary_does_not_inflate_reported_window_size() {
3407    let mut driver = MatchGeneratorDriver::new(8, 1);
3408    driver.reset(CompressionLevel::Fastest);
3409
3410    let before = driver.window_size();
3411    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3412    let after = driver.window_size();
3413
3414    assert_eq!(
3415        after, before,
3416        "dictionary retention budget must not change reported frame window size"
3417    );
3418}
3419
3420#[test]
3421fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
3422    let mut driver = MatchGeneratorDriver::new(8, 2);
3423    driver.reset(CompressionLevel::Fastest);
3424
3425    // This dictionary leaves a 1-byte tail chunk (capacity=1 suffix table),
3426    // which should never be committed to the matcher window.
3427    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3428
3429    assert!(
3430        driver
3431            .match_generator
3432            .window
3433            .iter()
3434            .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
3435        "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
3436    );
3437}
3438
3439#[test]
3440fn prime_with_dictionary_counts_only_committed_tail_budget() {
3441    let mut driver = MatchGeneratorDriver::new(8, 1);
3442    driver.reset(CompressionLevel::Fastest);
3443
3444    let before = driver.match_generator.max_window_size;
3445    // One full slice plus a 1-byte tail that cannot be committed.
3446    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3447
3448    assert_eq!(
3449        driver.match_generator.max_window_size,
3450        before + 8,
3451        "retention budget must account only for dictionary bytes actually committed to history"
3452    );
3453}
3454
3455#[test]
3456fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
3457    let mut driver = MatchGeneratorDriver::new(8, 1);
3458    driver.reset(CompressionLevel::Level(2));
3459
3460    let before = driver.dfast_matcher().max_window_size;
3461    // One full slice plus a 4-byte tail. Dfast can still use this tail through
3462    // short-hash overlap into the next block, so it should stay retained.
3463    driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
3464
3465    assert_eq!(
3466        driver.dfast_matcher().max_window_size,
3467        before + 12,
3468        "dfast retention budget should include 4-byte dictionary tails"
3469    );
3470}
3471
3472#[test]
3473fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
3474    let mut driver = MatchGeneratorDriver::new(8, 1);
3475    driver.reset(CompressionLevel::Level(4));
3476
3477    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3478
3479    let mut space = driver.get_next_space();
3480    space.clear();
3481    space.extend_from_slice(b"abcdefgh");
3482    driver.commit_space(space);
3483
3484    let mut saw_match = false;
3485    driver.start_matching(|seq| {
3486        if let Sequence::Triple {
3487            literals,
3488            offset,
3489            match_len,
3490        } = seq
3491            && literals.is_empty()
3492            && offset == 8
3493            && match_len >= ROW_MIN_MATCH_LEN
3494        {
3495            saw_match = true;
3496        }
3497    });
3498
3499    assert!(
3500        saw_match,
3501        "row backend should match dictionary-primed history in first full block"
3502    );
3503}
3504
3505#[test]
3506fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
3507    let mut driver = MatchGeneratorDriver::new(8, 1);
3508    driver.reset(CompressionLevel::Level(4));
3509
3510    let base_window = driver.row_matcher().max_window_size;
3511    // Slice size is 8. The trailing byte cannot be committed (<4 tail),
3512    // so it must be subtracted from retained budget.
3513    driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3514
3515    assert_eq!(
3516        driver.row_matcher().max_window_size,
3517        base_window + 8,
3518        "row retained window must exclude uncommitted 1-byte tail"
3519    );
3520}
3521
3522#[test]
3523fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
3524    let mut driver = MatchGeneratorDriver::new(8, 1);
3525    driver.reset(CompressionLevel::Level(4));
3526    // Keep live window tiny so dictionary-primed slices are evicted quickly.
3527    driver.row_matcher_mut().max_window_size = 8;
3528    driver.reported_window_size = 8;
3529
3530    let base_window = driver.row_matcher().max_window_size;
3531    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3532    assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
3533
3534    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3535        let mut space = driver.get_next_space();
3536        space.clear();
3537        space.extend_from_slice(block);
3538        driver.commit_space(space);
3539        driver.skip_matching();
3540    }
3541
3542    assert_eq!(
3543        driver.dictionary_retained_budget, 0,
3544        "dictionary budget should be fully retired once primed dict slices are evicted"
3545    );
3546    assert_eq!(
3547        driver.row_matcher().max_window_size,
3548        base_window,
3549        "retired dictionary budget must not remain reusable for live history"
3550    );
3551}
3552
3553#[test]
3554fn row_get_last_space_and_reset_to_fastest_clears_window() {
3555    let mut driver = MatchGeneratorDriver::new(8, 1);
3556    driver.reset(CompressionLevel::Level(4));
3557
3558    let mut space = driver.get_next_space();
3559    space.clear();
3560    space.extend_from_slice(b"row-data");
3561    driver.commit_space(space);
3562
3563    assert_eq!(driver.get_last_space(), b"row-data");
3564
3565    driver.reset(CompressionLevel::Fastest);
3566    assert_eq!(driver.active_backend, MatcherBackend::Simple);
3567    assert!(driver.row_matcher().window.is_empty());
3568}
3569
3570/// Ensures switching from Row to Simple returns pooled buffers and row tables.
3571#[test]
3572fn driver_reset_from_row_backend_reclaims_row_buffer_pool() {
3573    let mut driver = MatchGeneratorDriver::new(8, 1);
3574    driver.reset(CompressionLevel::Level(4));
3575    assert_eq!(driver.active_backend, MatcherBackend::Row);
3576
3577    // Ensure the row matcher option is initialized so reset() executes
3578    // the Row backend retirement path.
3579    let _ = driver.row_matcher();
3580    let mut space = driver.get_next_space();
3581    space.extend_from_slice(b"row-data-to-recycle");
3582    driver.commit_space(space);
3583
3584    let before_pool = driver.vec_pool.len();
3585    driver.reset(CompressionLevel::Fastest);
3586
3587    assert_eq!(driver.active_backend, MatcherBackend::Simple);
3588    let row = driver
3589        .row_match_generator
3590        .as_ref()
3591        .expect("row matcher should remain allocated after switch");
3592    assert!(row.row_heads.is_empty());
3593    assert!(row.row_positions.is_empty());
3594    assert!(row.row_tags.is_empty());
3595    assert!(
3596        driver.vec_pool.len() >= before_pool,
3597        "row reset should recycle row history buffers"
3598    );
3599}
3600
3601/// Guards the optional row backend retirement path when no row matcher was allocated.
3602#[test]
3603fn driver_reset_from_row_backend_tolerates_missing_row_matcher() {
3604    let mut driver = MatchGeneratorDriver::new(8, 1);
3605    driver.active_backend = MatcherBackend::Row;
3606    driver.row_match_generator = None;
3607
3608    driver.reset(CompressionLevel::Fastest);
3609
3610    assert_eq!(driver.active_backend, MatcherBackend::Simple);
3611}
3612
3613#[test]
3614fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
3615    let mut params = resolve_level_params(CompressionLevel::Level(4), None);
3616    params.window_log = 22;
3617    let adjusted = adjust_params_for_source_size(params, 0);
3618    assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
3619}
3620
3621#[test]
3622fn common_prefix_len_matches_scalar_reference_across_offsets() {
3623    fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
3624        a.iter()
3625            .zip(b.iter())
3626            .take_while(|(lhs, rhs)| lhs == rhs)
3627            .count()
3628    }
3629
3630    for total_len in [
3631        0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
3632    ] {
3633        let base: Vec<u8> = (0..total_len)
3634            .map(|i| ((i * 13 + 7) & 0xFF) as u8)
3635            .collect();
3636
3637        for start in [0usize, 1, 3] {
3638            if start > total_len {
3639                continue;
3640            }
3641            let a = &base[start..];
3642            let b = a.to_vec();
3643            assert_eq!(
3644                MatchGenerator::common_prefix_len(a, &b),
3645                scalar_reference(a, &b),
3646                "equal slices total_len={total_len} start={start}"
3647            );
3648
3649            let len = a.len();
3650            for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
3651                if mismatch >= len {
3652                    continue;
3653                }
3654                let mut altered = b.clone();
3655                altered[mismatch] ^= 0x5A;
3656                assert_eq!(
3657                    MatchGenerator::common_prefix_len(a, &altered),
3658                    scalar_reference(a, &altered),
3659                    "total_len={total_len} start={start} mismatch={mismatch}"
3660                );
3661            }
3662
3663            if len > 0 {
3664                let mismatch = len - 1;
3665                let mut altered = b.clone();
3666                altered[mismatch] ^= 0xA5;
3667                assert_eq!(
3668                    MatchGenerator::common_prefix_len(a, &altered),
3669                    scalar_reference(a, &altered),
3670                    "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
3671                );
3672            }
3673        }
3674    }
3675
3676    let long = alloc::vec![0xAB; 320];
3677    let shorter = alloc::vec![0xAB; 137];
3678    assert_eq!(
3679        MatchGenerator::common_prefix_len(&long, &shorter),
3680        scalar_reference(&long, &shorter)
3681    );
3682}
3683
3684#[test]
3685fn row_pick_lazy_returns_none_when_next_is_better() {
3686    let mut matcher = RowMatchGenerator::new(1 << 22);
3687    matcher.configure(ROW_CONFIG);
3688    matcher.add_data(alloc::vec![b'a'; 64], |_| {});
3689    matcher.ensure_tables();
3690
3691    let abs_pos = matcher.history_abs_start + 16;
3692    let best = MatchCandidate {
3693        start: abs_pos,
3694        offset: 8,
3695        match_len: ROW_MIN_MATCH_LEN,
3696    };
3697    assert!(
3698        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3699        "lazy picker should defer when next position is clearly better"
3700    );
3701}
3702
3703#[test]
3704fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
3705    let mut matcher = RowMatchGenerator::new(1 << 22);
3706    matcher.configure(ROW_CONFIG);
3707    matcher.lazy_depth = 2;
3708    matcher.search_depth = 0;
3709    matcher.offset_hist = [6, 9, 1];
3710
3711    let mut data = alloc::vec![b'x'; 40];
3712    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
3713    matcher.add_data(data, |_| {});
3714    matcher.ensure_tables();
3715
3716    let abs_pos = matcher.history_abs_start + 20;
3717    let best = matcher
3718        .best_match(abs_pos, 0)
3719        .expect("expected baseline repcode match");
3720    assert_eq!(best.offset, 9);
3721    assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3722
3723    if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
3724        assert!(next.match_len <= best.match_len);
3725    }
3726
3727    let next2 = matcher
3728        .best_match(abs_pos + 2, 2)
3729        .expect("expected +2 candidate");
3730    assert!(
3731        next2.match_len > best.match_len + 1,
3732        "+2 candidate must be significantly better for depth-2 lazy skip"
3733    );
3734    assert!(
3735        matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
3736        "lazy picker should defer when +2 candidate is significantly better"
3737    );
3738}
3739
3740#[test]
3741fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
3742    let mut matcher = RowMatchGenerator::new(1 << 22);
3743    matcher.configure(ROW_CONFIG);
3744    matcher.lazy_depth = 2;
3745    matcher.search_depth = 0;
3746    matcher.offset_hist = [6, 9, 1];
3747
3748    let mut data = alloc::vec![b'x'; 40];
3749    data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
3750    matcher.add_data(data, |_| {});
3751    matcher.ensure_tables();
3752
3753    let abs_pos = matcher.history_abs_start + 20;
3754    let best = matcher
3755        .best_match(abs_pos, 0)
3756        .expect("expected baseline repcode match");
3757    assert_eq!(best.offset, 9);
3758    assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
3759
3760    let next2 = matcher
3761        .best_match(abs_pos + 2, 2)
3762        .expect("expected +2 candidate");
3763    assert_eq!(next2.match_len, best.match_len + 1);
3764    let chosen = matcher
3765        .pick_lazy_match(abs_pos, 0, Some(best))
3766        .expect("lazy picker should keep current best");
3767    assert_eq!(chosen.start, best.start);
3768    assert_eq!(chosen.offset, best.offset);
3769    assert_eq!(chosen.match_len, best.match_len);
3770}
3771
3772/// Verifies row/tag extraction uses the high bits of the multiplicative hash.
3773#[test]
3774fn row_hash_and_row_extracts_high_bits() {
3775    let mut matcher = RowMatchGenerator::new(1 << 22);
3776    matcher.configure(ROW_CONFIG);
3777    matcher.add_data(
3778        alloc::vec![
3779            0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
3780            0x70, 0x80,
3781        ],
3782        |_| {},
3783    );
3784    matcher.ensure_tables();
3785
3786    let pos = matcher.history_abs_start + 8;
3787    let (row, tag) = matcher
3788        .hash_and_row(pos)
3789        .expect("row hash should be available");
3790
3791    let idx = pos - matcher.history_abs_start;
3792    let concat = matcher.live_history();
3793    let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
3794    const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
3795    let hash = value.wrapping_mul(PRIME);
3796    let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
3797    let combined = hash >> (u64::BITS as usize - total_bits);
3798    let expected_row =
3799        ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
3800    let expected_tag = combined as u8;
3801
3802    assert_eq!(row, expected_row);
3803    assert_eq!(tag, expected_tag);
3804}
3805
3806#[test]
3807fn row_repcode_skips_candidate_before_history_start() {
3808    let mut matcher = RowMatchGenerator::new(1 << 22);
3809    matcher.configure(ROW_CONFIG);
3810    matcher.history = alloc::vec![b'a'; 20];
3811    matcher.history_start = 0;
3812    matcher.history_abs_start = 10;
3813    matcher.offset_hist = [3, 0, 0];
3814
3815    assert!(matcher.repcode_candidate(12, 1).is_none());
3816}
3817
3818#[test]
3819fn row_repcode_returns_none_when_position_too_close_to_history_end() {
3820    let mut matcher = RowMatchGenerator::new(1 << 22);
3821    matcher.configure(ROW_CONFIG);
3822    matcher.history = b"abcde".to_vec();
3823    matcher.history_start = 0;
3824    matcher.history_abs_start = 0;
3825    matcher.offset_hist = [1, 0, 0];
3826
3827    assert!(matcher.repcode_candidate(4, 1).is_none());
3828}
3829
3830#[test]
3831fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
3832    let mut matcher = RowMatchGenerator::new(1 << 22);
3833    matcher.configure(ROW_CONFIG);
3834    matcher.history = b"abcde".to_vec();
3835    matcher.history_start = 0;
3836    matcher.history_abs_start = 0;
3837
3838    assert!(matcher.row_candidate(0, 0).is_none());
3839}
3840
3841#[test]
3842fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
3843    let mut hc = HcMatchGenerator::new(32);
3844    hc.history = b"abc".to_vec();
3845    hc.history_start = 0;
3846    hc.history_abs_start = 0;
3847    hc.ensure_tables();
3848
3849    let candidates = hc.chain_candidates(0);
3850    assert!(candidates.iter().all(|&pos| pos == usize::MAX));
3851}
3852
3853#[test]
3854fn hc_reset_refills_existing_tables_with_empty_sentinel() {
3855    let mut hc = HcMatchGenerator::new(32);
3856    hc.add_data(b"abcdeabcde".to_vec(), |_| {});
3857    hc.ensure_tables();
3858    assert!(!hc.hash_table.is_empty());
3859    assert!(!hc.chain_table.is_empty());
3860    hc.hash_table.fill(123);
3861    hc.chain_table.fill(456);
3862
3863    hc.reset(|_| {});
3864
3865    assert!(hc.hash_table.iter().all(|&v| v == HC_EMPTY));
3866    assert!(hc.chain_table.iter().all(|&v| v == HC_EMPTY));
3867}
3868
3869#[test]
3870fn hc_start_matching_returns_early_for_empty_current_block() {
3871    let mut hc = HcMatchGenerator::new(32);
3872    hc.add_data(Vec::new(), |_| {});
3873    let mut called = false;
3874    hc.start_matching(|_| called = true);
3875    assert!(!called, "empty current block should not emit sequences");
3876}
3877
3878#[test]
3879fn hc_compact_history_drains_when_threshold_crossed() {
3880    let mut hc = HcMatchGenerator::new(8);
3881    hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
3882    hc.history_start = 16;
3883    hc.compact_history();
3884    assert_eq!(hc.history_start, 0);
3885    assert_eq!(hc.history, b"qrstuvwxyz");
3886}
3887
3888#[test]
3889fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
3890    let mut hc = HcMatchGenerator::new(32);
3891    hc.history = b"abcdefghijklmnop".to_vec();
3892    hc.history_abs_start = 0;
3893    hc.position_base = 1;
3894    hc.ensure_tables();
3895    let before_hash = hc.hash_table.clone();
3896    let before_chain = hc.chain_table.clone();
3897
3898    hc.insert_position_no_rebase(0);
3899
3900    assert_eq!(hc.hash_table, before_hash);
3901    assert_eq!(hc.chain_table, before_chain);
3902}
3903
3904#[test]
3905fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
3906    let mut driver = MatchGeneratorDriver::new(8, 1);
3907    driver.reset(CompressionLevel::Fastest);
3908    // Use a small live window so dictionary-primed slices are evicted
3909    // quickly and budget retirement can be asserted deterministically.
3910    driver.match_generator.max_window_size = 8;
3911    driver.reported_window_size = 8;
3912
3913    let base_window = driver.match_generator.max_window_size;
3914    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3915    assert_eq!(driver.match_generator.max_window_size, base_window + 24);
3916
3917    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3918        let mut space = driver.get_next_space();
3919        space.clear();
3920        space.extend_from_slice(block);
3921        driver.commit_space(space);
3922        driver.skip_matching();
3923    }
3924
3925    assert_eq!(
3926        driver.dictionary_retained_budget, 0,
3927        "dictionary budget should be fully retired once primed dict slices are evicted"
3928    );
3929    assert_eq!(
3930        driver.match_generator.max_window_size, base_window,
3931        "retired dictionary budget must not remain reusable for live history"
3932    );
3933}
3934
3935#[test]
3936fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
3937    let mut driver = MatchGeneratorDriver::new(8, 1);
3938    driver.reset(CompressionLevel::Level(2));
3939    // Use a small live window in this regression so dictionary-primed slices are
3940    // evicted quickly and budget retirement can be asserted deterministically.
3941    driver.dfast_matcher_mut().max_window_size = 8;
3942    driver.reported_window_size = 8;
3943
3944    let base_window = driver.dfast_matcher().max_window_size;
3945    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3946    assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
3947
3948    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
3949        let mut space = driver.get_next_space();
3950        space.clear();
3951        space.extend_from_slice(block);
3952        driver.commit_space(space);
3953        driver.skip_matching();
3954    }
3955
3956    assert_eq!(
3957        driver.dictionary_retained_budget, 0,
3958        "dictionary budget should be fully retired once primed dict slices are evicted"
3959    );
3960    assert_eq!(
3961        driver.dfast_matcher().max_window_size,
3962        base_window,
3963        "retired dictionary budget must not remain reusable for live history"
3964    );
3965}
3966
3967#[test]
3968fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
3969    let mut driver = MatchGeneratorDriver::new(8, 1);
3970    driver.reset(CompressionLevel::Better);
3971
3972    driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3973
3974    let mut space = driver.get_next_space();
3975    space.clear();
3976    // Repeat the dictionary content so the HC matcher can find it.
3977    // HC_MIN_MATCH_LEN is 5, so an 8-byte match is well above threshold.
3978    space.extend_from_slice(b"abcdefgh");
3979    driver.commit_space(space);
3980
3981    let mut saw_match = false;
3982    driver.start_matching(|seq| {
3983        if let Sequence::Triple {
3984            literals,
3985            offset,
3986            match_len,
3987        } = seq
3988            && literals.is_empty()
3989            && offset == 8
3990            && match_len >= HC_MIN_MATCH_LEN
3991        {
3992            saw_match = true;
3993        }
3994    });
3995
3996    assert!(
3997        saw_match,
3998        "hash-chain backend should match dictionary-primed history in first full block"
3999    );
4000}
4001
4002#[test]
4003fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
4004    let mut driver = MatchGeneratorDriver::new(8, 1);
4005    driver.reset(CompressionLevel::Better);
4006    // Use a small live window so dictionary-primed slices are evicted quickly.
4007    driver.hc_matcher_mut().max_window_size = 8;
4008    driver.reported_window_size = 8;
4009
4010    let base_window = driver.hc_matcher().max_window_size;
4011    driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4012    assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
4013
4014    for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4015        let mut space = driver.get_next_space();
4016        space.clear();
4017        space.extend_from_slice(block);
4018        driver.commit_space(space);
4019        driver.skip_matching();
4020    }
4021
4022    assert_eq!(
4023        driver.dictionary_retained_budget, 0,
4024        "dictionary budget should be fully retired once primed dict slices are evicted"
4025    );
4026    assert_eq!(
4027        driver.hc_matcher().max_window_size,
4028        base_window,
4029        "retired dictionary budget must not remain reusable for live history"
4030    );
4031}
4032
4033#[test]
4034fn hc_rebases_positions_after_u32_boundary() {
4035    let mut matcher = HcMatchGenerator::new(64);
4036    matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4037    matcher.ensure_tables();
4038    matcher.position_base = 0;
4039    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
4040        Ok(value) => value,
4041        Err(_) => return,
4042    };
4043    // Simulate a long-running stream where absolute history positions crossed
4044    // the u32 range. Before #51 this disabled HC inserts entirely.
4045    matcher.history_abs_start = history_abs_start;
4046    matcher.skip_matching();
4047    assert_eq!(
4048        matcher.position_base, matcher.history_abs_start,
4049        "rebase should anchor to the oldest live absolute position"
4050    );
4051
4052    assert!(
4053        matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY),
4054        "HC hash table should still be populated after crossing u32 boundary"
4055    );
4056
4057    // Verify rebasing preserves candidate lookup, not just table population.
4058    let abs_pos = matcher.history_abs_start + 10;
4059    let candidates = matcher.chain_candidates(abs_pos);
4060    assert!(
4061        candidates.iter().any(|candidate| *candidate != usize::MAX),
4062        "chain_candidates should return valid matches after rebase"
4063    );
4064}
4065
4066#[test]
4067fn hc_rebase_rebuilds_only_inserted_prefix() {
4068    let mut matcher = HcMatchGenerator::new(64);
4069    matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4070    matcher.ensure_tables();
4071    matcher.position_base = 0;
4072    let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
4073        Ok(value) => value,
4074        Err(_) => return,
4075    };
4076    matcher.history_abs_start = history_abs_start;
4077    let abs_pos = matcher.history_abs_start + 6;
4078
4079    let mut expected = HcMatchGenerator::new(64);
4080    expected.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4081    expected.ensure_tables();
4082    expected.history_abs_start = history_abs_start;
4083    expected.position_base = expected.history_abs_start;
4084    expected.hash_table.fill(HC_EMPTY);
4085    expected.chain_table.fill(HC_EMPTY);
4086    for pos in expected.history_abs_start..abs_pos {
4087        expected.insert_position_no_rebase(pos);
4088    }
4089
4090    matcher.maybe_rebase_positions(abs_pos);
4091
4092    assert_eq!(
4093        matcher.position_base, matcher.history_abs_start,
4094        "rebase should still anchor to the oldest live absolute position"
4095    );
4096    assert_eq!(
4097        matcher.hash_table, expected.hash_table,
4098        "rebase must rebuild only positions already inserted before abs_pos"
4099    );
4100    assert_eq!(
4101        matcher.chain_table, expected.chain_table,
4102        "future positions must not be pre-seeded into HC chains during rebase"
4103    );
4104}
4105
4106#[test]
4107fn suffix_store_with_single_slot_does_not_panic_on_keying() {
4108    let mut suffixes = SuffixStore::with_capacity(1);
4109    suffixes.insert(b"abcde", 0);
4110    assert!(suffixes.contains_key(b"abcde"));
4111    assert_eq!(suffixes.get(b"abcde"), Some(0));
4112}
4113
4114#[test]
4115fn fastest_reset_uses_interleaved_hash_fill_step() {
4116    let mut driver = MatchGeneratorDriver::new(32, 2);
4117
4118    driver.reset(CompressionLevel::Uncompressed);
4119    assert_eq!(driver.match_generator.hash_fill_step, 1);
4120
4121    driver.reset(CompressionLevel::Fastest);
4122    assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
4123
4124    // Better uses the HashChain backend with lazy2; verify that the backend switch
4125    // happened and the lazy_depth is configured correctly.
4126    driver.reset(CompressionLevel::Better);
4127    assert_eq!(driver.active_backend, MatcherBackend::HashChain);
4128    assert_eq!(driver.window_size(), (1u64 << 23));
4129    assert_eq!(driver.hc_matcher().lazy_depth, 2);
4130}
4131
4132#[test]
4133fn simple_matcher_updates_offset_history_after_emitting_match() {
4134    let mut matcher = MatchGenerator::new(64);
4135    matcher.add_data(
4136        b"abcdeabcdeabcde".to_vec(),
4137        SuffixStore::with_capacity(64),
4138        |_, _| {},
4139    );
4140
4141    assert!(matcher.next_sequence(|seq| {
4142        assert_eq!(
4143            seq,
4144            Sequence::Triple {
4145                literals: b"abcde",
4146                offset: 5,
4147                match_len: 10,
4148            }
4149        );
4150    }));
4151    assert_eq!(matcher.offset_hist, [5, 1, 4]);
4152}
4153
4154#[test]
4155fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
4156    let mut matcher = MatchGenerator::new(64);
4157    matcher.add_data(
4158        b"abcdefghijabcdefghij".to_vec(),
4159        SuffixStore::with_capacity(64),
4160        |_, _| {},
4161    );
4162
4163    matcher.suffix_idx = 10;
4164    matcher.last_idx_in_sequence = 10;
4165    matcher.offset_hist = [99, 10, 4];
4166
4167    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4168    assert_eq!(candidate, Some((10, 10)));
4169}
4170
4171#[test]
4172fn simple_matcher_repcode_can_target_previous_window_entry() {
4173    let mut matcher = MatchGenerator::new(64);
4174    matcher.add_data(
4175        b"abcdefghij".to_vec(),
4176        SuffixStore::with_capacity(64),
4177        |_, _| {},
4178    );
4179    matcher.skip_matching();
4180    matcher.add_data(
4181        b"abcdefghij".to_vec(),
4182        SuffixStore::with_capacity(64),
4183        |_, _| {},
4184    );
4185
4186    matcher.offset_hist = [99, 10, 4];
4187
4188    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
4189    assert_eq!(candidate, Some((10, 10)));
4190}
4191
4192#[test]
4193fn simple_matcher_zero_literal_repcode_checks_rep2() {
4194    let mut matcher = MatchGenerator::new(64);
4195    matcher.add_data(
4196        b"abcdefghijabcdefghij".to_vec(),
4197        SuffixStore::with_capacity(64),
4198        |_, _| {},
4199    );
4200    matcher.suffix_idx = 10;
4201    matcher.last_idx_in_sequence = 10;
4202    // rep1=4 does not match at idx 10, rep2=10 does.
4203    matcher.offset_hist = [99, 4, 10];
4204
4205    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4206    assert_eq!(candidate, Some((10, 10)));
4207}
4208
4209#[test]
4210fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
4211    let mut matcher = MatchGenerator::new(64);
4212    matcher.add_data(
4213        b"abcdefghijabcdefghij".to_vec(),
4214        SuffixStore::with_capacity(64),
4215        |_, _| {},
4216    );
4217    matcher.suffix_idx = 10;
4218    matcher.last_idx_in_sequence = 10;
4219    // rep1=4 and rep2=99 do not match; rep0-1 == 10 does.
4220    matcher.offset_hist = [11, 4, 99];
4221
4222    let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4223    assert_eq!(candidate, Some((10, 10)));
4224}
4225
4226#[test]
4227fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
4228    let mut matcher = MatchGenerator::new(64);
4229    matcher.add_data(
4230        b"abcdefghij".to_vec(),
4231        SuffixStore::with_capacity(64),
4232        |_, _| {},
4233    );
4234    matcher.skip_matching();
4235    matcher.add_data(
4236        b"klmnopqrst".to_vec(),
4237        SuffixStore::with_capacity(64),
4238        |_, _| {},
4239    );
4240    matcher.suffix_idx = 3;
4241
4242    let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
4243    assert_eq!(candidate, None);
4244}
4245
4246#[test]
4247fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
4248    let mut matcher = MatchGenerator::new(64);
4249    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4250    matcher.add_data(
4251        b"abcdefghijklmnop".to_vec(),
4252        SuffixStore::with_capacity(64),
4253        |_, _| {},
4254    );
4255    matcher.skip_matching();
4256    matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
4257
4258    assert!(matcher.next_sequence(|seq| {
4259        assert_eq!(
4260            seq,
4261            Sequence::Triple {
4262                literals: b"",
4263                offset: 15,
4264                match_len: 5,
4265            }
4266        );
4267    }));
4268    assert!(!matcher.next_sequence(|_| {}));
4269}
4270
4271#[test]
4272fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
4273    let mut matcher = MatchGenerator::new(64);
4274    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4275    matcher.add_data(
4276        b"01234abcde".to_vec(),
4277        SuffixStore::with_capacity(64),
4278        |_, _| {},
4279    );
4280    matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
4281
4282    let last = matcher.window.last().unwrap();
4283    let tail = &last.data[5..10];
4284    assert_eq!(last.suffixes.get(tail), Some(5));
4285}
4286
4287#[test]
4288fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
4289    let mut matcher = MatchGenerator::new(128);
4290    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4291    matcher.add_data(
4292        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4293        SuffixStore::with_capacity(1 << 16),
4294        |_, _| {},
4295    );
4296
4297    matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
4298
4299    let last = matcher.window.last().unwrap();
4300    let first_key = &last.data[..MIN_MATCH_LEN];
4301    assert_eq!(last.suffixes.get(first_key), None);
4302}
4303
4304#[test]
4305fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
4306    let mut matcher = MatchGenerator::new(128);
4307    matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4308    matcher.add_data(
4309        b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4310        SuffixStore::with_capacity(1 << 16),
4311        |_, _| {},
4312    );
4313
4314    matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
4315
4316    let last = matcher.window.last().unwrap();
4317    for pos in [0usize, 3, 6, 9, 12] {
4318        let key = &last.data[pos..pos + MIN_MATCH_LEN];
4319        assert_eq!(
4320            last.suffixes.get(key),
4321            Some(pos),
4322            "expected interleaved suffix registration at pos {pos}"
4323        );
4324    }
4325}
4326
4327#[test]
4328fn dfast_skip_matching_handles_window_eviction() {
4329    let mut matcher = DfastMatchGenerator::new(16);
4330
4331    matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
4332    matcher.skip_matching();
4333    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4334    matcher.skip_matching();
4335    matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4336
4337    let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
4338    matcher.start_matching(|seq| match seq {
4339        Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4340        Sequence::Triple {
4341            literals,
4342            offset,
4343            match_len,
4344        } => {
4345            reconstructed.extend_from_slice(literals);
4346            let start = reconstructed.len() - offset;
4347            for i in 0..match_len {
4348                let byte = reconstructed[start + i];
4349                reconstructed.push(byte);
4350            }
4351        }
4352    });
4353
4354    assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
4355}
4356
4357#[test]
4358fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
4359    let mut matcher = DfastMatchGenerator::new(8);
4360
4361    let mut first = Vec::with_capacity(64);
4362    first.extend_from_slice(b"abcdefgh");
4363    matcher.add_data(first, |_| {});
4364
4365    let mut second = Vec::with_capacity(64);
4366    second.extend_from_slice(b"ijklmnop");
4367
4368    let mut observed_evicted_len = None;
4369    matcher.add_data(second, |data| {
4370        observed_evicted_len = Some(data.len());
4371    });
4372
4373    assert_eq!(
4374        observed_evicted_len,
4375        Some(8),
4376        "eviction callback must report evicted byte length, not backing capacity"
4377    );
4378}
4379
4380#[test]
4381fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
4382    let mut matcher = DfastMatchGenerator::new(16);
4383
4384    let mut first = Vec::with_capacity(64);
4385    first.extend_from_slice(b"abcdefgh");
4386    matcher.add_data(first, |_| {});
4387
4388    let mut second = Vec::with_capacity(64);
4389    second.extend_from_slice(b"ijklmnop");
4390    matcher.add_data(second, |_| {});
4391
4392    matcher.max_window_size = 8;
4393
4394    let mut observed_evicted_len = None;
4395    matcher.trim_to_window(|data| {
4396        observed_evicted_len = Some(data.len());
4397    });
4398
4399    assert_eq!(
4400        observed_evicted_len,
4401        Some(8),
4402        "trim callback must report evicted byte length, not backing capacity"
4403    );
4404}
4405
4406#[test]
4407fn dfast_inserts_tail_positions_for_next_block_matching() {
4408    let mut matcher = DfastMatchGenerator::new(1 << 22);
4409
4410    matcher.add_data(b"012345bcdea".to_vec(), |_| {});
4411    let mut history = Vec::new();
4412    matcher.start_matching(|seq| match seq {
4413        Sequence::Literals { literals } => history.extend_from_slice(literals),
4414        Sequence::Triple { .. } => unreachable!("first block should not match history"),
4415    });
4416    assert_eq!(history, b"012345bcdea");
4417
4418    matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
4419    let mut saw_first_sequence = false;
4420    matcher.start_matching(|seq| {
4421        assert!(!saw_first_sequence, "expected a single cross-block match");
4422        saw_first_sequence = true;
4423        match seq {
4424            Sequence::Literals { .. } => {
4425                panic!("expected tail-anchored cross-block match before any literals")
4426            }
4427            Sequence::Triple {
4428                literals,
4429                offset,
4430                match_len,
4431            } => {
4432                assert_eq!(literals, b"");
4433                assert_eq!(offset, 5);
4434                assert_eq!(match_len, 11);
4435                let start = history.len() - offset;
4436                for i in 0..match_len {
4437                    let byte = history[start + i];
4438                    history.push(byte);
4439                }
4440            }
4441        }
4442    });
4443
4444    assert!(
4445        saw_first_sequence,
4446        "expected tail-anchored cross-block match"
4447    );
4448    assert_eq!(history, b"012345bcdeabcdeabcdeab");
4449}
4450
4451#[test]
4452fn fastest_hint_iteration_23_sequences_reconstruct_source() {
4453    fn generate_data(seed: u64, len: usize) -> Vec<u8> {
4454        let mut state = seed;
4455        let mut data = Vec::with_capacity(len);
4456        for _ in 0..len {
4457            state = state
4458                .wrapping_mul(6364136223846793005)
4459                .wrapping_add(1442695040888963407);
4460            data.push((state >> 33) as u8);
4461        }
4462        data
4463    }
4464
4465    let i = 23u64;
4466    let len = (i * 89 % 16384) as usize;
4467    let mut data = generate_data(i, len);
4468    // Append a repeated slice so the fixture deterministically exercises
4469    // the match path (Sequence::Triple) instead of only literals.
4470    let repeat = data[128..256].to_vec();
4471    data.extend_from_slice(&repeat);
4472    data.extend_from_slice(&repeat);
4473
4474    let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
4475    driver.set_source_size_hint(data.len() as u64);
4476    driver.reset(CompressionLevel::Fastest);
4477    let mut space = driver.get_next_space();
4478    space[..data.len()].copy_from_slice(&data);
4479    space.truncate(data.len());
4480    driver.commit_space(space);
4481
4482    let mut rebuilt = Vec::with_capacity(data.len());
4483    let mut saw_triple = false;
4484    driver.start_matching(|seq| match seq {
4485        Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
4486        Sequence::Triple {
4487            literals,
4488            offset,
4489            match_len,
4490        } => {
4491            saw_triple = true;
4492            rebuilt.extend_from_slice(literals);
4493            assert!(offset > 0, "offset must be non-zero");
4494            assert!(
4495                offset <= rebuilt.len(),
4496                "offset must reference already-produced bytes: offset={} produced={}",
4497                offset,
4498                rebuilt.len()
4499            );
4500            let start = rebuilt.len() - offset;
4501            for idx in 0..match_len {
4502                let b = rebuilt[start + idx];
4503                rebuilt.push(b);
4504            }
4505        }
4506    });
4507
4508    assert!(saw_triple, "fixture must emit at least one match");
4509    assert_eq!(rebuilt, data);
4510}