1use alloc::collections::VecDeque;
9use alloc::vec::Vec;
10#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
11use core::arch::aarch64::{
12 __crc32d, uint8x16_t, vceqq_u8, vgetq_lane_u64, vld1q_u8, vreinterpretq_u64_u8,
13};
14#[cfg(target_arch = "x86")]
15use core::arch::x86::{
16 __m128i, __m256i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm256_cmpeq_epi8,
17 _mm256_loadu_si256, _mm256_movemask_epi8,
18};
19#[cfg(target_arch = "x86_64")]
20use core::arch::x86_64::{
21 __m128i, __m256i, _mm_cmpeq_epi8, _mm_crc32_u64, _mm_loadu_si128, _mm_movemask_epi8,
22 _mm256_cmpeq_epi8, _mm256_loadu_si256, _mm256_movemask_epi8,
23};
24use core::convert::TryInto;
25use core::num::NonZeroUsize;
26
27use super::BETTER_WINDOW_LOG;
28use super::CompressionLevel;
29use super::Matcher;
30use super::Sequence;
31use super::blocks::encode_offset_with_history;
32use super::incompressible::{block_looks_incompressible, block_looks_incompressible_strict};
33#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
34use std::arch::is_aarch64_feature_detected;
35#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
36use std::arch::is_x86_feature_detected;
37#[cfg(feature = "std")]
38use std::sync::OnceLock;
39
40const MIN_MATCH_LEN: usize = 5;
41const FAST_HASH_FILL_STEP: usize = 3;
42const INCOMPRESSIBLE_SKIP_STEP: usize = 8;
43const DFAST_MIN_MATCH_LEN: usize = 6;
44const DFAST_SHORT_HASH_LOOKAHEAD: usize = 4;
45const ROW_MIN_MATCH_LEN: usize = 6;
46const DFAST_TARGET_LEN: usize = 48;
47const DFAST_HASH_BITS: usize = 20;
50const DFAST_SEARCH_DEPTH: usize = 4;
51const DFAST_EMPTY_SLOT: usize = usize::MAX;
52const DFAST_SKIP_SEARCH_STRENGTH: usize = 6;
53const DFAST_SKIP_STEP_GROWTH_INTERVAL: usize = 1 << DFAST_SKIP_SEARCH_STRENGTH;
54const DFAST_LOCAL_SKIP_TRIGGER: usize = 256;
55const DFAST_MAX_SKIP_STEP: usize = 8;
56const DFAST_INCOMPRESSIBLE_SKIP_STEP: usize = 16;
57const ROW_HASH_BITS: usize = 20;
58const ROW_LOG: usize = 5;
59const ROW_SEARCH_DEPTH: usize = 16;
60const ROW_TARGET_LEN: usize = 48;
61const ROW_TAG_BITS: usize = 8;
62const ROW_EMPTY_SLOT: usize = usize::MAX;
63const ROW_HASH_KEY_LEN: usize = 4;
64const HASH_MIX_PRIME: u64 = 0x9E37_79B1_85EB_CA87;
65
66const HC_HASH_LOG: usize = 20;
67const HC_CHAIN_LOG: usize = 19;
68const HC_SEARCH_DEPTH: usize = 16;
69const HC_MIN_MATCH_LEN: usize = 5;
70const HC_TARGET_LEN: usize = 48;
71const HC_EMPTY: u32 = 0;
74
75const MAX_HC_SEARCH_DEPTH: usize = 32;
78
79#[derive(Copy, Clone, Debug, Eq, PartialEq)]
80#[repr(u8)]
81enum HashMixKernel {
82 Scalar = 0,
83 #[cfg(target_arch = "x86_64")]
84 X86Sse42 = 1,
85 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
86 Aarch64Crc = 2,
87}
88
89#[inline(always)]
90fn hash_mix_u64_with_kernel(value: u64, kernel: HashMixKernel) -> u64 {
91 match kernel {
92 HashMixKernel::Scalar => value.wrapping_mul(HASH_MIX_PRIME),
93 #[cfg(target_arch = "x86_64")]
94 HashMixKernel::X86Sse42 => {
95 unsafe { hash_mix_u64_sse42(value) }
97 }
98 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
99 HashMixKernel::Aarch64Crc => {
100 unsafe { hash_mix_u64_crc(value) }
102 }
103 }
104}
105
106#[inline(always)]
107fn detect_hash_mix_kernel() -> HashMixKernel {
108 #[cfg(all(feature = "std", target_arch = "x86_64"))]
109 if is_x86_feature_detected!("sse4.2") {
110 return HashMixKernel::X86Sse42;
111 }
112
113 #[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
114 if is_aarch64_feature_detected!("crc") {
115 return HashMixKernel::Aarch64Crc;
116 }
117
118 #[cfg(all(not(feature = "std"), target_arch = "x86_64"))]
119 if cfg!(target_feature = "sse4.2") {
120 return HashMixKernel::X86Sse42;
121 }
122
123 #[cfg(all(
124 not(feature = "std"),
125 target_arch = "aarch64",
126 target_endian = "little"
127 ))]
128 if cfg!(target_feature = "crc") {
129 return HashMixKernel::Aarch64Crc;
130 }
131
132 HashMixKernel::Scalar
133}
134
135#[cfg(target_arch = "x86_64")]
136#[target_feature(enable = "sse4.2")]
137unsafe fn hash_mix_u64_sse42(value: u64) -> u64 {
138 let crc = _mm_crc32_u64(0, value);
139 ((crc << 32) ^ value.rotate_left(13)).wrapping_mul(HASH_MIX_PRIME)
140}
141
142#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
143#[target_feature(enable = "crc")]
144unsafe fn hash_mix_u64_crc(value: u64) -> u64 {
145 let crc = __crc32d(0, value) as u64;
149 ((crc << 32) ^ value.rotate_left(17)).wrapping_mul(HASH_MIX_PRIME)
150}
151
152#[derive(Copy, Clone, Debug, Eq, PartialEq)]
153enum PrefixKernel {
154 Scalar,
155 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
156 X86Sse2,
157 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
158 X86Avx2,
159 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
160 Aarch64Neon,
161}
162
163#[derive(Copy, Clone)]
166struct HcConfig {
167 hash_log: usize,
168 chain_log: usize,
169 search_depth: usize,
170 target_len: usize,
171}
172
173#[derive(Copy, Clone)]
174struct RowConfig {
175 hash_bits: usize,
176 row_log: usize,
177 search_depth: usize,
178 target_len: usize,
179}
180
181const HC_CONFIG: HcConfig = HcConfig {
182 hash_log: HC_HASH_LOG,
183 chain_log: HC_CHAIN_LOG,
184 search_depth: HC_SEARCH_DEPTH,
185 target_len: HC_TARGET_LEN,
186};
187
188const BEST_HC_CONFIG: HcConfig = HcConfig {
190 hash_log: 21,
191 chain_log: 20,
192 search_depth: 32,
193 target_len: 128,
194};
195
196const ROW_CONFIG: RowConfig = RowConfig {
197 hash_bits: ROW_HASH_BITS,
198 row_log: ROW_LOG,
199 search_depth: ROW_SEARCH_DEPTH,
200 target_len: ROW_TARGET_LEN,
201};
202
203#[derive(Copy, Clone)]
205struct LevelParams {
206 backend: MatcherBackend,
207 window_log: u8,
208 hash_fill_step: usize,
209 lazy_depth: u8,
210 hc: HcConfig,
211 row: RowConfig,
212}
213
214fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
215 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
216 window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
217}
218
219fn row_hash_bits_for_window(max_window_size: usize) -> usize {
220 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
221 window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
222}
223
224#[rustfmt::skip]
233const LEVEL_TABLE: [LevelParams; 22] = [
234 LevelParams { backend: MatcherBackend::Simple, window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
237 LevelParams { backend: MatcherBackend::Dfast, window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
238 LevelParams { backend: MatcherBackend::Dfast, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
239 LevelParams { backend: MatcherBackend::Row, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
240 LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4, target_len: 32 }, row: ROW_CONFIG },
241 LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8, target_len: 48 }, row: ROW_CONFIG },
242 LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48 }, row: ROW_CONFIG },
243 LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
244 LevelParams { backend: MatcherBackend::HashChain, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
245 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96 }, row: ROW_CONFIG },
246 LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
247 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
248 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
249 LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
250 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
251 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
252 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
253 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
254 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
255 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
256 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
257 LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 }, row: ROW_CONFIG },
258];
259
260const MIN_WINDOW_LOG: u8 = 10;
262const MIN_HINTED_WINDOW_LOG: u8 = 14;
268
269fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
279 let src_log = if src_size == 0 {
284 MIN_WINDOW_LOG
285 } else {
286 (64 - (src_size - 1).leading_zeros()) as u8 };
288 let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
289 if src_log < params.window_log {
290 params.window_log = src_log;
291 }
292 if params.backend == MatcherBackend::HashChain {
296 if (src_log + 2) < params.hc.hash_log as u8 {
297 params.hc.hash_log = (src_log + 2) as usize;
298 }
299 if (src_log + 1) < params.hc.chain_log as u8 {
300 params.hc.chain_log = (src_log + 1) as usize;
301 }
302 } else if params.backend == MatcherBackend::Row {
303 let max_window_size = 1usize << params.window_log;
304 params.row.hash_bits = row_hash_bits_for_window(max_window_size);
305 }
306 params
307}
308
309fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
312 let params = match level {
313 CompressionLevel::Uncompressed => LevelParams {
314 backend: MatcherBackend::Simple,
315 window_log: 17,
316 hash_fill_step: 1,
317 lazy_depth: 0,
318 hc: HC_CONFIG,
319 row: ROW_CONFIG,
320 },
321 CompressionLevel::Fastest => LEVEL_TABLE[0],
322 CompressionLevel::Default => LEVEL_TABLE[2],
323 CompressionLevel::Better => LEVEL_TABLE[6],
324 CompressionLevel::Best => LEVEL_TABLE[10],
325 CompressionLevel::Level(n) => {
326 if n > 0 {
327 let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
328 LEVEL_TABLE[idx]
329 } else if n == 0 {
330 LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
332 } else {
333 let acceleration =
337 (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
338 let step = (acceleration + 3).min(128);
339 LevelParams {
340 backend: MatcherBackend::Simple,
341 window_log: 17,
342 hash_fill_step: step,
343 lazy_depth: 0,
344 hc: HC_CONFIG,
345 row: ROW_CONFIG,
346 }
347 }
348 }
349 };
350 if let Some(size) = source_size {
351 adjust_params_for_source_size(params, size)
352 } else {
353 params
354 }
355}
356
357#[derive(Copy, Clone, Debug, PartialEq, Eq)]
358enum MatcherBackend {
359 Simple,
360 Dfast,
361 Row,
362 HashChain,
363}
364
365pub struct MatchGeneratorDriver {
367 vec_pool: Vec<Vec<u8>>,
368 suffix_pool: Vec<SuffixStore>,
369 match_generator: MatchGenerator,
370 dfast_match_generator: Option<DfastMatchGenerator>,
371 row_match_generator: Option<RowMatchGenerator>,
372 hc_match_generator: Option<HcMatchGenerator>,
373 active_backend: MatcherBackend,
374 slice_size: usize,
375 base_slice_size: usize,
376 reported_window_size: usize,
379 dictionary_retained_budget: usize,
382 source_size_hint: Option<u64>,
384}
385
386impl MatchGeneratorDriver {
387 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
392 let max_window_size = max_slices_in_window * slice_size;
393 Self {
394 vec_pool: Vec::new(),
395 suffix_pool: Vec::new(),
396 match_generator: MatchGenerator::new(max_window_size),
397 dfast_match_generator: None,
398 row_match_generator: None,
399 hc_match_generator: None,
400 active_backend: MatcherBackend::Simple,
401 slice_size,
402 base_slice_size: slice_size,
403 reported_window_size: max_window_size,
404 dictionary_retained_budget: 0,
405 source_size_hint: None,
406 }
407 }
408
409 fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
410 resolve_level_params(level, source_size)
411 }
412
413 fn dfast_matcher(&self) -> &DfastMatchGenerator {
414 self.dfast_match_generator
415 .as_ref()
416 .expect("dfast backend must be initialized by reset() before use")
417 }
418
419 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
420 self.dfast_match_generator
421 .as_mut()
422 .expect("dfast backend must be initialized by reset() before use")
423 }
424
425 fn row_matcher(&self) -> &RowMatchGenerator {
426 self.row_match_generator
427 .as_ref()
428 .expect("row backend must be initialized by reset() before use")
429 }
430
431 fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
432 self.row_match_generator
433 .as_mut()
434 .expect("row backend must be initialized by reset() before use")
435 }
436
437 fn hc_matcher(&self) -> &HcMatchGenerator {
438 self.hc_match_generator
439 .as_ref()
440 .expect("hash chain backend must be initialized by reset() before use")
441 }
442
443 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
444 self.hc_match_generator
445 .as_mut()
446 .expect("hash chain backend must be initialized by reset() before use")
447 }
448
449 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) {
450 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
451 if reclaimed == 0 {
452 return;
453 }
454 self.dictionary_retained_budget -= reclaimed;
455 match self.active_backend {
456 MatcherBackend::Simple => {
457 self.match_generator.max_window_size = self
458 .match_generator
459 .max_window_size
460 .saturating_sub(reclaimed);
461 }
462 MatcherBackend::Dfast => {
463 let matcher = self.dfast_matcher_mut();
464 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
465 }
466 MatcherBackend::Row => {
467 let matcher = self.row_matcher_mut();
468 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
469 }
470 MatcherBackend::HashChain => {
471 let matcher = self.hc_matcher_mut();
472 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
473 }
474 }
475 }
476
477 fn trim_after_budget_retire(&mut self) {
478 loop {
479 let mut evicted_bytes = 0usize;
480 match self.active_backend {
481 MatcherBackend::Simple => {
482 let vec_pool = &mut self.vec_pool;
483 let suffix_pool = &mut self.suffix_pool;
484 self.match_generator.reserve(0, |mut data, mut suffixes| {
485 evicted_bytes += data.len();
486 data.resize(data.capacity(), 0);
487 vec_pool.push(data);
488 suffixes.slots.clear();
489 suffixes.slots.resize(suffixes.slots.capacity(), None);
490 suffix_pool.push(suffixes);
491 });
492 }
493 MatcherBackend::Dfast => {
494 let mut retired = Vec::new();
495 self.dfast_matcher_mut().trim_to_window(|data| {
496 evicted_bytes += data.len();
497 retired.push(data);
498 });
499 for mut data in retired {
500 data.resize(data.capacity(), 0);
501 self.vec_pool.push(data);
502 }
503 }
504 MatcherBackend::Row => {
505 let mut retired = Vec::new();
506 self.row_matcher_mut().trim_to_window(|data| {
507 evicted_bytes += data.len();
508 retired.push(data);
509 });
510 for mut data in retired {
511 data.resize(data.capacity(), 0);
512 self.vec_pool.push(data);
513 }
514 }
515 MatcherBackend::HashChain => {
516 let mut retired = Vec::new();
517 self.hc_matcher_mut().trim_to_window(|data| {
518 evicted_bytes += data.len();
519 retired.push(data);
520 });
521 for mut data in retired {
522 data.resize(data.capacity(), 0);
523 self.vec_pool.push(data);
524 }
525 }
526 }
527 if evicted_bytes == 0 {
528 break;
529 }
530 self.retire_dictionary_budget(evicted_bytes);
531 }
532 }
533
534 fn skip_matching_for_dictionary_priming(&mut self) {
535 match self.active_backend {
536 MatcherBackend::Simple => self.match_generator.skip_matching_with_hint(Some(false)),
537 MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching_dense(),
538 MatcherBackend::Row => self.row_matcher_mut().skip_matching_with_hint(Some(false)),
539 MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(Some(false)),
540 }
541 }
542}
543
544impl Matcher for MatchGeneratorDriver {
545 fn supports_dictionary_priming(&self) -> bool {
546 true
547 }
548
549 fn set_source_size_hint(&mut self, size: u64) {
550 self.source_size_hint = Some(size);
551 }
552
553 fn reset(&mut self, level: CompressionLevel) {
554 let hint = self.source_size_hint.take();
555 let hinted = hint.is_some();
556 let params = Self::level_params(level, hint);
557 let max_window_size = 1usize << params.window_log;
558 self.dictionary_retained_budget = 0;
559 if self.active_backend != params.backend {
560 match self.active_backend {
561 MatcherBackend::Simple => {
562 let vec_pool = &mut self.vec_pool;
563 let suffix_pool = &mut self.suffix_pool;
564 self.match_generator.reset(|mut data, mut suffixes| {
565 data.resize(data.capacity(), 0);
566 vec_pool.push(data);
567 suffixes.slots.clear();
568 suffixes.slots.resize(suffixes.slots.capacity(), None);
569 suffix_pool.push(suffixes);
570 });
571 }
572 MatcherBackend::Dfast => {
573 if let Some(dfast) = self.dfast_match_generator.as_mut() {
574 let vec_pool = &mut self.vec_pool;
575 dfast.reset(|mut data| {
576 data.resize(data.capacity(), 0);
577 vec_pool.push(data);
578 });
579 }
580 }
581 MatcherBackend::Row => {
582 if let Some(row) = self.row_match_generator.as_mut() {
583 row.row_heads = Vec::new();
584 row.row_positions = Vec::new();
585 row.row_tags = Vec::new();
586 let vec_pool = &mut self.vec_pool;
587 row.reset(|mut data| {
588 data.resize(data.capacity(), 0);
589 vec_pool.push(data);
590 });
591 }
592 }
593 MatcherBackend::HashChain => {
594 if let Some(hc) = self.hc_match_generator.as_mut() {
595 hc.hash_table = Vec::new();
598 hc.chain_table = Vec::new();
599 let vec_pool = &mut self.vec_pool;
600 hc.reset(|mut data| {
601 data.resize(data.capacity(), 0);
602 vec_pool.push(data);
603 });
604 }
605 }
606 }
607 }
608
609 self.active_backend = params.backend;
610 self.slice_size = self.base_slice_size.min(max_window_size);
611 self.reported_window_size = max_window_size;
612 match self.active_backend {
613 MatcherBackend::Simple => {
614 let vec_pool = &mut self.vec_pool;
615 let suffix_pool = &mut self.suffix_pool;
616 self.match_generator.max_window_size = max_window_size;
617 self.match_generator.hash_fill_step = params.hash_fill_step;
618 self.match_generator.reset(|mut data, mut suffixes| {
619 data.resize(data.capacity(), 0);
620 vec_pool.push(data);
621 suffixes.slots.clear();
622 suffixes.slots.resize(suffixes.slots.capacity(), None);
623 suffix_pool.push(suffixes);
624 });
625 }
626 MatcherBackend::Dfast => {
627 let dfast = self
628 .dfast_match_generator
629 .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
630 dfast.max_window_size = max_window_size;
631 dfast.lazy_depth = params.lazy_depth;
632 dfast.use_fast_loop = matches!(
633 level,
634 CompressionLevel::Default
635 | CompressionLevel::Level(0)
636 | CompressionLevel::Level(3)
637 );
638 dfast.set_hash_bits(if hinted {
639 dfast_hash_bits_for_window(max_window_size)
640 } else {
641 DFAST_HASH_BITS
642 });
643 let vec_pool = &mut self.vec_pool;
644 dfast.reset(|mut data| {
645 data.resize(data.capacity(), 0);
646 vec_pool.push(data);
647 });
648 }
649 MatcherBackend::Row => {
650 let row = self
651 .row_match_generator
652 .get_or_insert_with(|| RowMatchGenerator::new(max_window_size));
653 row.max_window_size = max_window_size;
654 row.lazy_depth = params.lazy_depth;
655 row.configure(params.row);
656 if hinted {
657 row.set_hash_bits(row_hash_bits_for_window(max_window_size));
658 }
659 let vec_pool = &mut self.vec_pool;
660 row.reset(|mut data| {
661 data.resize(data.capacity(), 0);
662 vec_pool.push(data);
663 });
664 }
665 MatcherBackend::HashChain => {
666 let hc = self
667 .hc_match_generator
668 .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
669 hc.max_window_size = max_window_size;
670 hc.lazy_depth = params.lazy_depth;
671 hc.configure(params.hc);
672 let vec_pool = &mut self.vec_pool;
673 hc.reset(|mut data| {
674 data.resize(data.capacity(), 0);
675 vec_pool.push(data);
676 });
677 }
678 }
679 }
680
681 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
682 match self.active_backend {
683 MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist,
684 MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist,
685 MatcherBackend::Row => self.row_matcher_mut().offset_hist = offset_hist,
686 MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist,
687 }
688
689 if dict_content.is_empty() {
690 return;
691 }
692
693 let retained_dict_budget = dict_content.len();
696 match self.active_backend {
697 MatcherBackend::Simple => {
698 self.match_generator.max_window_size = self
699 .match_generator
700 .max_window_size
701 .saturating_add(retained_dict_budget);
702 }
703 MatcherBackend::Dfast => {
704 let matcher = self.dfast_matcher_mut();
705 matcher.max_window_size =
706 matcher.max_window_size.saturating_add(retained_dict_budget);
707 }
708 MatcherBackend::Row => {
709 let matcher = self.row_matcher_mut();
710 matcher.max_window_size =
711 matcher.max_window_size.saturating_add(retained_dict_budget);
712 }
713 MatcherBackend::HashChain => {
714 let matcher = self.hc_matcher_mut();
715 matcher.max_window_size =
716 matcher.max_window_size.saturating_add(retained_dict_budget);
717 }
718 }
719
720 let mut start = 0usize;
721 let mut committed_dict_budget = 0usize;
722 let min_primed_tail = match self.active_backend {
726 MatcherBackend::Simple => MIN_MATCH_LEN,
727 MatcherBackend::Dfast | MatcherBackend::Row | MatcherBackend::HashChain => 4,
728 };
729 while start < dict_content.len() {
730 let end = (start + self.slice_size).min(dict_content.len());
731 if end - start < min_primed_tail {
732 break;
733 }
734 let mut space = self.get_next_space();
735 space.clear();
736 space.extend_from_slice(&dict_content[start..end]);
737 self.commit_space(space);
738 self.skip_matching_for_dictionary_priming();
739 committed_dict_budget += end - start;
740 start = end;
741 }
742
743 let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
744 if uncommitted_tail_budget > 0 {
745 match self.active_backend {
746 MatcherBackend::Simple => {
747 self.match_generator.max_window_size = self
748 .match_generator
749 .max_window_size
750 .saturating_sub(uncommitted_tail_budget);
751 }
752 MatcherBackend::Dfast => {
753 let matcher = self.dfast_matcher_mut();
754 matcher.max_window_size = matcher
755 .max_window_size
756 .saturating_sub(uncommitted_tail_budget);
757 }
758 MatcherBackend::Row => {
759 let matcher = self.row_matcher_mut();
760 matcher.max_window_size = matcher
761 .max_window_size
762 .saturating_sub(uncommitted_tail_budget);
763 }
764 MatcherBackend::HashChain => {
765 let matcher = self.hc_matcher_mut();
766 matcher.max_window_size = matcher
767 .max_window_size
768 .saturating_sub(uncommitted_tail_budget);
769 }
770 }
771 }
772 if committed_dict_budget > 0 {
773 self.dictionary_retained_budget = self
774 .dictionary_retained_budget
775 .saturating_add(committed_dict_budget);
776 }
777 }
778
779 fn window_size(&self) -> u64 {
780 self.reported_window_size as u64
781 }
782
783 fn get_next_space(&mut self) -> Vec<u8> {
784 if let Some(mut space) = self.vec_pool.pop() {
785 if space.len() > self.slice_size {
786 space.truncate(self.slice_size);
787 }
788 if space.len() < self.slice_size {
789 space.resize(self.slice_size, 0);
790 }
791 return space;
792 }
793 alloc::vec![0; self.slice_size]
794 }
795
796 fn get_last_space(&mut self) -> &[u8] {
797 match self.active_backend {
798 MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(),
799 MatcherBackend::Dfast => self.dfast_matcher().get_last_space(),
800 MatcherBackend::Row => self.row_matcher().get_last_space(),
801 MatcherBackend::HashChain => self.hc_matcher().get_last_space(),
802 }
803 }
804
805 fn commit_space(&mut self, space: Vec<u8>) {
806 match self.active_backend {
807 MatcherBackend::Simple => {
808 let vec_pool = &mut self.vec_pool;
809 let mut evicted_bytes = 0usize;
810 let suffixes = match self.suffix_pool.pop() {
811 Some(store) if store.slots.len() >= space.len() => store,
812 _ => SuffixStore::with_capacity(space.len()),
813 };
814 let suffix_pool = &mut self.suffix_pool;
815 self.match_generator
816 .add_data(space, suffixes, |mut data, mut suffixes| {
817 evicted_bytes += data.len();
818 data.resize(data.capacity(), 0);
819 vec_pool.push(data);
820 suffixes.slots.clear();
821 suffixes.slots.resize(suffixes.slots.capacity(), None);
822 suffix_pool.push(suffixes);
823 });
824 self.retire_dictionary_budget(evicted_bytes);
825 self.trim_after_budget_retire();
826 }
827 MatcherBackend::Dfast => {
828 let vec_pool = &mut self.vec_pool;
829 let mut evicted_bytes = 0usize;
830 self.dfast_match_generator
831 .as_mut()
832 .expect("dfast backend must be initialized by reset() before use")
833 .add_data(space, |mut data| {
834 evicted_bytes += data.len();
835 data.resize(data.capacity(), 0);
836 vec_pool.push(data);
837 });
838 self.retire_dictionary_budget(evicted_bytes);
839 self.trim_after_budget_retire();
840 }
841 MatcherBackend::Row => {
842 let vec_pool = &mut self.vec_pool;
843 let mut evicted_bytes = 0usize;
844 self.row_match_generator
845 .as_mut()
846 .expect("row backend must be initialized by reset() before use")
847 .add_data(space, |mut data| {
848 evicted_bytes += data.len();
849 data.resize(data.capacity(), 0);
850 vec_pool.push(data);
851 });
852 self.retire_dictionary_budget(evicted_bytes);
853 self.trim_after_budget_retire();
854 }
855 MatcherBackend::HashChain => {
856 let vec_pool = &mut self.vec_pool;
857 let mut evicted_bytes = 0usize;
858 self.hc_match_generator
859 .as_mut()
860 .expect("hash chain backend must be initialized by reset() before use")
861 .add_data(space, |mut data| {
862 evicted_bytes += data.len();
863 data.resize(data.capacity(), 0);
864 vec_pool.push(data);
865 });
866 self.retire_dictionary_budget(evicted_bytes);
867 self.trim_after_budget_retire();
868 }
869 }
870 }
871
872 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
873 match self.active_backend {
874 MatcherBackend::Simple => {
875 while self.match_generator.next_sequence(&mut handle_sequence) {}
876 }
877 MatcherBackend::Dfast => self
878 .dfast_matcher_mut()
879 .start_matching(&mut handle_sequence),
880 MatcherBackend::Row => self.row_matcher_mut().start_matching(&mut handle_sequence),
881 MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence),
882 }
883 }
884
885 fn skip_matching(&mut self) {
886 self.skip_matching_with_hint(None);
887 }
888
889 fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
890 match self.active_backend {
891 MatcherBackend::Simple => self
892 .match_generator
893 .skip_matching_with_hint(incompressible_hint),
894 MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(incompressible_hint),
895 MatcherBackend::Row => self
896 .row_matcher_mut()
897 .skip_matching_with_hint(incompressible_hint),
898 MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(incompressible_hint),
899 }
900 }
901}
902
903struct SuffixStore {
906 slots: Vec<Option<NonZeroUsize>>,
910 len_log: u32,
911}
912
913impl SuffixStore {
914 fn with_capacity(capacity: usize) -> Self {
915 Self {
916 slots: alloc::vec![None; capacity],
917 len_log: capacity.ilog2(),
918 }
919 }
920
921 #[inline(always)]
922 fn insert(&mut self, suffix: &[u8], idx: usize) {
923 let key = self.key(suffix);
924 self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
925 }
926
927 #[inline(always)]
928 fn contains_key(&self, suffix: &[u8]) -> bool {
929 let key = self.key(suffix);
930 self.slots[key].is_some()
931 }
932
933 #[inline(always)]
934 fn get(&self, suffix: &[u8]) -> Option<usize> {
935 let key = self.key(suffix);
936 self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
937 }
938
939 #[inline(always)]
940 fn key(&self, suffix: &[u8]) -> usize {
941 if self.len_log == 0 {
943 return 0;
944 }
945
946 let s0 = suffix[0] as u64;
947 let s1 = suffix[1] as u64;
948 let s2 = suffix[2] as u64;
949 let s3 = suffix[3] as u64;
950 let s4 = suffix[4] as u64;
951
952 const POLY: u64 = 0xCF3BCCDCABu64;
953
954 let s0 = (s0 << 24).wrapping_mul(POLY);
955 let s1 = (s1 << 32).wrapping_mul(POLY);
956 let s2 = (s2 << 40).wrapping_mul(POLY);
957 let s3 = (s3 << 48).wrapping_mul(POLY);
958 let s4 = (s4 << 56).wrapping_mul(POLY);
959
960 let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
961 let index = index >> (64 - self.len_log);
962 index as usize % self.slots.len()
963 }
964}
965
966struct WindowEntry {
969 data: Vec<u8>,
970 suffixes: SuffixStore,
972 base_offset: usize,
974}
975
976pub(crate) struct MatchGenerator {
977 max_window_size: usize,
978 window: Vec<WindowEntry>,
981 window_size: usize,
982 #[cfg(debug_assertions)]
983 concat_window: Vec<u8>,
984 suffix_idx: usize,
986 last_idx_in_sequence: usize,
988 hash_fill_step: usize,
989 offset_hist: [u32; 3],
990}
991
992impl MatchGenerator {
993 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
994 #[inline(always)]
995 const fn select_x86_prefix_kernel(has_avx2: bool, has_sse2: bool) -> PrefixKernel {
996 if has_avx2 {
997 return PrefixKernel::X86Avx2;
998 }
999 if has_sse2 {
1000 return PrefixKernel::X86Sse2;
1001 }
1002 PrefixKernel::Scalar
1003 }
1004
1005 #[cfg(feature = "std")]
1006 #[inline(always)]
1007 fn detect_prefix_kernel() -> PrefixKernel {
1008 static KERNEL: OnceLock<PrefixKernel> = OnceLock::new();
1009 *KERNEL.get_or_init(|| {
1010 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1011 {
1012 let kernel = Self::select_x86_prefix_kernel(
1013 is_x86_feature_detected!("avx2"),
1014 is_x86_feature_detected!("sse2"),
1015 );
1016 if kernel != PrefixKernel::Scalar {
1017 return kernel;
1018 }
1019 }
1020 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1021 {
1022 if is_aarch64_feature_detected!("neon") {
1023 return PrefixKernel::Aarch64Neon;
1024 }
1025 }
1026 PrefixKernel::Scalar
1027 })
1028 }
1029
1030 #[cfg(not(feature = "std"))]
1031 #[inline(always)]
1032 fn detect_prefix_kernel() -> PrefixKernel {
1033 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1034 {
1035 let kernel = Self::select_x86_prefix_kernel(
1036 cfg!(target_feature = "avx2"),
1037 cfg!(target_feature = "sse2"),
1038 );
1039 if kernel != PrefixKernel::Scalar {
1040 return kernel;
1041 }
1042 }
1043 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1044 {
1045 if cfg!(target_feature = "neon") {
1046 return PrefixKernel::Aarch64Neon;
1047 }
1048 }
1049 PrefixKernel::Scalar
1050 }
1051
1052 #[inline(always)]
1053 #[cfg(target_endian = "little")]
1054 fn mismatch_byte_index(diff: usize) -> usize {
1055 diff.trailing_zeros() as usize / 8
1056 }
1057
1058 #[inline(always)]
1059 #[cfg(target_endian = "big")]
1060 fn mismatch_byte_index(diff: usize) -> usize {
1061 diff.leading_zeros() as usize / 8
1062 }
1063
1064 fn new(max_size: usize) -> Self {
1066 Self {
1067 max_window_size: max_size,
1068 window: Vec::new(),
1069 window_size: 0,
1070 #[cfg(debug_assertions)]
1071 concat_window: Vec::new(),
1072 suffix_idx: 0,
1073 last_idx_in_sequence: 0,
1074 hash_fill_step: 1,
1075 offset_hist: [1, 4, 8],
1076 }
1077 }
1078
1079 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1080 self.window_size = 0;
1081 #[cfg(debug_assertions)]
1082 self.concat_window.clear();
1083 self.suffix_idx = 0;
1084 self.last_idx_in_sequence = 0;
1085 self.offset_hist = [1, 4, 8];
1086 self.window.drain(..).for_each(|entry| {
1087 reuse_space(entry.data, entry.suffixes);
1088 });
1089 }
1090
1091 fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
1096 loop {
1097 let last_entry = self.window.last().unwrap();
1098 let data_slice = &last_entry.data;
1099
1100 if self.suffix_idx >= data_slice.len() {
1102 if self.last_idx_in_sequence != self.suffix_idx {
1103 let literals = &data_slice[self.last_idx_in_sequence..];
1104 self.last_idx_in_sequence = self.suffix_idx;
1105 handle_sequence(Sequence::Literals { literals });
1106 return true;
1107 } else {
1108 return false;
1109 }
1110 }
1111
1112 let data_slice = &data_slice[self.suffix_idx..];
1114 if data_slice.len() < MIN_MATCH_LEN {
1115 let last_idx_in_sequence = self.last_idx_in_sequence;
1116 self.last_idx_in_sequence = last_entry.data.len();
1117 self.suffix_idx = last_entry.data.len();
1118 handle_sequence(Sequence::Literals {
1119 literals: &last_entry.data[last_idx_in_sequence..],
1120 });
1121 return true;
1122 }
1123
1124 let key = &data_slice[..MIN_MATCH_LEN];
1126 let literals_len = self.suffix_idx - self.last_idx_in_sequence;
1127
1128 let mut candidate = self.repcode_candidate(data_slice, literals_len);
1130 for match_entry in self.window.iter() {
1131 if let Some(match_index) = match_entry.suffixes.get(key) {
1132 let match_slice = &match_entry.data[match_index..];
1133
1134 let match_len = Self::common_prefix_len(match_slice, data_slice);
1136
1137 if match_len >= MIN_MATCH_LEN {
1139 let offset = match_entry.base_offset + self.suffix_idx - match_index;
1140
1141 #[cfg(debug_assertions)]
1143 {
1144 let unprocessed = last_entry.data.len() - self.suffix_idx;
1145 let start = self.concat_window.len() - unprocessed - offset;
1146 let end = start + match_len;
1147 let check_slice = &self.concat_window[start..end];
1148 debug_assert_eq!(check_slice, &match_slice[..match_len]);
1149 }
1150
1151 if let Some((old_offset, old_match_len)) = candidate {
1152 if match_len > old_match_len
1153 || (match_len == old_match_len && offset < old_offset)
1154 {
1155 candidate = Some((offset, match_len));
1156 }
1157 } else {
1158 candidate = Some((offset, match_len));
1159 }
1160 }
1161 }
1162 }
1163
1164 if let Some((offset, match_len)) = candidate {
1165 self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step);
1168
1169 let last_entry = self.window.last().unwrap();
1171 let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
1172
1173 self.suffix_idx += match_len;
1175 self.last_idx_in_sequence = self.suffix_idx;
1176 let _ = encode_offset_with_history(
1177 offset as u32,
1178 literals.len() as u32,
1179 &mut self.offset_hist,
1180 );
1181 handle_sequence(Sequence::Triple {
1182 literals,
1183 offset,
1184 match_len,
1185 });
1186
1187 return true;
1188 }
1189
1190 let last_entry = self.window.last_mut().unwrap();
1191 let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
1192 if !last_entry.suffixes.contains_key(key) {
1193 last_entry.suffixes.insert(key, self.suffix_idx);
1194 }
1195 self.suffix_idx += 1;
1196 }
1197 }
1198
1199 #[inline(always)]
1201 fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
1202 let max = a.len().min(b.len());
1203 let mut off = 0usize;
1204 let lhs = a.as_ptr();
1205 let rhs = b.as_ptr();
1206
1207 match Self::detect_prefix_kernel() {
1208 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1209 PrefixKernel::X86Avx2 => {
1210 off = unsafe { Self::prefix_len_simd_avx2(lhs, rhs, max) };
1211 }
1212 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1213 PrefixKernel::X86Sse2 => {
1214 off = unsafe { Self::prefix_len_simd_sse2(lhs, rhs, max) };
1215 }
1216 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1217 PrefixKernel::Aarch64Neon => {
1218 off = unsafe { Self::prefix_len_simd_neon(lhs, rhs, max) };
1219 }
1220 PrefixKernel::Scalar => {}
1221 }
1222
1223 Self::common_prefix_len_scalar(a, b, off, max)
1224 }
1225
1226 #[inline(always)]
1227 fn common_prefix_len_scalar(a: &[u8], b: &[u8], mut off: usize, max: usize) -> usize {
1228 let chunk = core::mem::size_of::<usize>();
1229 let lhs = a.as_ptr();
1230 let rhs = b.as_ptr();
1231 while off + chunk <= max {
1232 let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) };
1233 let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) };
1234 let diff = lhs_word ^ rhs_word;
1235 if diff != 0 {
1236 return off + Self::mismatch_byte_index(diff);
1237 }
1238 off += chunk;
1239 }
1240 off + core::iter::zip(&a[off..max], &b[off..max])
1241 .take_while(|(x, y)| x == y)
1242 .count()
1243 }
1244
1245 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1246 #[target_feature(enable = "sse2")]
1247 unsafe fn prefix_len_simd_sse2(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1248 let mut off = 0usize;
1249 while off + 16 <= max {
1250 let a: __m128i = unsafe { _mm_loadu_si128(lhs.add(off).cast::<__m128i>()) };
1251 let b: __m128i = unsafe { _mm_loadu_si128(rhs.add(off).cast::<__m128i>()) };
1252 let eq = _mm_cmpeq_epi8(a, b);
1253 let mask = _mm_movemask_epi8(eq) as u32;
1254 if mask != 0xFFFF {
1255 return off + (!mask).trailing_zeros() as usize;
1256 }
1257 off += 16;
1258 }
1259 off
1260 }
1261
1262 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1263 #[target_feature(enable = "avx2")]
1264 unsafe fn prefix_len_simd_avx2(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1265 let mut off = 0usize;
1266 while off + 32 <= max {
1267 let a: __m256i = unsafe { _mm256_loadu_si256(lhs.add(off).cast::<__m256i>()) };
1268 let b: __m256i = unsafe { _mm256_loadu_si256(rhs.add(off).cast::<__m256i>()) };
1269 let eq = _mm256_cmpeq_epi8(a, b);
1270 let mask = _mm256_movemask_epi8(eq) as u32;
1271 if mask != u32::MAX {
1272 return off + (!mask).trailing_zeros() as usize;
1273 }
1274 off += 32;
1275 }
1276 off
1277 }
1278
1279 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
1280 #[target_feature(enable = "neon")]
1281 unsafe fn prefix_len_simd_neon(lhs: *const u8, rhs: *const u8, max: usize) -> usize {
1282 let mut off = 0usize;
1283 while off + 16 <= max {
1284 let a: uint8x16_t = unsafe { vld1q_u8(lhs.add(off)) };
1285 let b: uint8x16_t = unsafe { vld1q_u8(rhs.add(off)) };
1286 let eq = vceqq_u8(a, b);
1287 let lanes = vreinterpretq_u64_u8(eq);
1288 let low = vgetq_lane_u64(lanes, 0);
1289 if low != u64::MAX {
1290 let diff = low ^ u64::MAX;
1291 return off + Self::mismatch_byte_index(diff as usize);
1292 }
1293 let high = vgetq_lane_u64(lanes, 1);
1294 if high != u64::MAX {
1295 let diff = high ^ u64::MAX;
1296 return off + 8 + Self::mismatch_byte_index(diff as usize);
1297 }
1298 off += 16;
1299 }
1300 off
1301 }
1302
1303 #[inline(always)]
1305 fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) {
1306 let start = self.suffix_idx;
1307 let last_entry = self.window.last_mut().unwrap();
1308 if last_entry.data.len() < MIN_MATCH_LEN {
1309 return;
1310 }
1311 let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1);
1312 if insert_limit > start {
1313 let data = last_entry.data.as_slice();
1314 let suffixes = &mut last_entry.suffixes;
1315 if fill_step == FAST_HASH_FILL_STEP {
1316 Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit);
1317 } else {
1318 let mut pos = start;
1319 while pos < insert_limit {
1320 Self::insert_suffix_if_absent(data, suffixes, pos);
1321 pos += fill_step;
1322 }
1323 }
1324 }
1325
1326 if idx >= start + MIN_MATCH_LEN {
1327 let tail_start = idx - MIN_MATCH_LEN;
1328 let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN];
1329 if !last_entry.suffixes.contains_key(tail_key) {
1330 last_entry.suffixes.insert(tail_key, tail_start);
1331 }
1332 }
1333 }
1334
1335 #[inline(always)]
1336 fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) {
1337 debug_assert!(
1338 pos + MIN_MATCH_LEN <= data.len(),
1339 "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}",
1340 pos,
1341 MIN_MATCH_LEN,
1342 data.len()
1343 );
1344 let key = &data[pos..pos + MIN_MATCH_LEN];
1345 if !suffixes.contains_key(key) {
1346 suffixes.insert(key, pos);
1347 }
1348 }
1349
1350 #[inline(always)]
1351 fn add_suffixes_interleaved_fast(
1352 data: &[u8],
1353 suffixes: &mut SuffixStore,
1354 start: usize,
1355 insert_limit: usize,
1356 ) {
1357 let lane = FAST_HASH_FILL_STEP;
1358 let mut pos = start;
1359
1360 while pos + lane * 3 < insert_limit {
1363 let p0 = pos;
1364 let p1 = pos + lane;
1365 let p2 = pos + lane * 2;
1366 let p3 = pos + lane * 3;
1367
1368 Self::insert_suffix_if_absent(data, suffixes, p0);
1369 Self::insert_suffix_if_absent(data, suffixes, p1);
1370 Self::insert_suffix_if_absent(data, suffixes, p2);
1371 Self::insert_suffix_if_absent(data, suffixes, p3);
1372
1373 pos += lane * 4;
1374 }
1375
1376 while pos < insert_limit {
1377 Self::insert_suffix_if_absent(data, suffixes, pos);
1378 pos += lane;
1379 }
1380 }
1381
1382 fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> {
1383 if literals_len != 0 {
1384 return None;
1385 }
1386
1387 let reps = [
1388 Some(self.offset_hist[1] as usize),
1389 Some(self.offset_hist[2] as usize),
1390 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
1391 ];
1392
1393 let mut best: Option<(usize, usize)> = None;
1394 let mut seen = [0usize; 3];
1395 let mut seen_len = 0usize;
1396 for offset in reps.into_iter().flatten() {
1397 if offset == 0 {
1398 continue;
1399 }
1400 if seen[..seen_len].contains(&offset) {
1401 continue;
1402 }
1403 seen[seen_len] = offset;
1404 seen_len += 1;
1405
1406 let Some(match_len) = self.offset_match_len(offset, data_slice) else {
1407 continue;
1408 };
1409 if match_len < MIN_MATCH_LEN {
1410 continue;
1411 }
1412
1413 if best.is_none_or(|(old_offset, old_len)| {
1414 match_len > old_len || (match_len == old_len && offset < old_offset)
1415 }) {
1416 best = Some((offset, match_len));
1417 }
1418 }
1419 best
1420 }
1421
1422 fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option<usize> {
1423 if offset == 0 {
1424 return None;
1425 }
1426
1427 let last_idx = self.window.len().checked_sub(1)?;
1428 let last_entry = &self.window[last_idx];
1429 let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx);
1430 if offset > searchable_prefix {
1431 return None;
1432 }
1433
1434 let mut remaining = offset;
1435 let (entry_idx, match_index) = if remaining <= self.suffix_idx {
1436 (last_idx, self.suffix_idx - remaining)
1437 } else {
1438 remaining -= self.suffix_idx;
1439 let mut found = None;
1440 for entry_idx in (0..last_idx).rev() {
1441 let len = self.window[entry_idx].data.len();
1442 if remaining <= len {
1443 found = Some((entry_idx, len - remaining));
1444 break;
1445 }
1446 remaining -= len;
1447 }
1448 found?
1449 };
1450
1451 let match_entry = &self.window[entry_idx];
1452 let match_slice = &match_entry.data[match_index..];
1453
1454 Some(Self::common_prefix_len(match_slice, data_slice))
1455 }
1456
1457 fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
1463 let len = self.window.last().unwrap().data.len();
1464 if incompressible_hint == Some(true) {
1465 let dense_tail = MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
1466 let sparse_end = len.saturating_sub(dense_tail);
1467 self.add_suffixes_till(sparse_end, INCOMPRESSIBLE_SKIP_STEP);
1468 self.suffix_idx = sparse_end;
1469 self.add_suffixes_till(len, 1);
1470 } else {
1471 self.add_suffixes_till(len, 1);
1472 }
1473 self.suffix_idx = len;
1474 self.last_idx_in_sequence = len;
1475 }
1476
1477 #[cfg(test)]
1479 fn skip_matching(&mut self) {
1480 self.skip_matching_with_hint(None);
1481 }
1482
1483 fn add_data(
1486 &mut self,
1487 data: Vec<u8>,
1488 suffixes: SuffixStore,
1489 reuse_space: impl FnMut(Vec<u8>, SuffixStore),
1490 ) {
1491 assert!(
1492 self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
1493 );
1494 self.reserve(data.len(), reuse_space);
1495 #[cfg(debug_assertions)]
1496 self.concat_window.extend_from_slice(&data);
1497
1498 if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
1499 for entry in self.window.iter_mut() {
1500 entry.base_offset += last_len;
1501 }
1502 }
1503
1504 let len = data.len();
1505 self.window.push(WindowEntry {
1506 data,
1507 suffixes,
1508 base_offset: 0,
1509 });
1510 self.window_size += len;
1511 self.suffix_idx = 0;
1512 self.last_idx_in_sequence = 0;
1513 }
1514
1515 fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
1518 assert!(self.max_window_size >= amount);
1519 while self.window_size + amount > self.max_window_size {
1520 let removed = self.window.remove(0);
1521 self.window_size -= removed.data.len();
1522 #[cfg(debug_assertions)]
1523 self.concat_window.drain(0..removed.data.len());
1524
1525 let WindowEntry {
1526 suffixes,
1527 data: leaked_vec,
1528 base_offset: _,
1529 } = removed;
1530 reuse_space(leaked_vec, suffixes);
1531 }
1532 }
1533}
1534
1535struct DfastMatchGenerator {
1536 max_window_size: usize,
1537 window: VecDeque<Vec<u8>>,
1538 window_size: usize,
1539 history: Vec<u8>,
1542 history_start: usize,
1543 history_abs_start: usize,
1544 offset_hist: [u32; 3],
1545 short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1546 long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
1547 hash_bits: usize,
1548 hash_mix_kernel: HashMixKernel,
1549 use_fast_loop: bool,
1550 lazy_depth: u8,
1552}
1553
1554#[derive(Copy, Clone)]
1555struct MatchCandidate {
1556 start: usize,
1557 offset: usize,
1558 match_len: usize,
1559}
1560
1561fn best_len_offset_candidate(
1562 lhs: Option<MatchCandidate>,
1563 rhs: Option<MatchCandidate>,
1564) -> Option<MatchCandidate> {
1565 match (lhs, rhs) {
1566 (None, other) | (other, None) => other,
1567 (Some(lhs), Some(rhs)) => {
1568 if rhs.match_len > lhs.match_len
1569 || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset)
1570 {
1571 Some(rhs)
1572 } else {
1573 Some(lhs)
1574 }
1575 }
1576 }
1577}
1578
1579fn extend_backwards_shared(
1580 concat: &[u8],
1581 history_abs_start: usize,
1582 mut candidate_pos: usize,
1583 mut abs_pos: usize,
1584 mut match_len: usize,
1585 lit_len: usize,
1586) -> MatchCandidate {
1587 let min_abs_pos = abs_pos - lit_len;
1588 while abs_pos > min_abs_pos
1589 && candidate_pos > history_abs_start
1590 && concat[candidate_pos - history_abs_start - 1] == concat[abs_pos - history_abs_start - 1]
1591 {
1592 candidate_pos -= 1;
1593 abs_pos -= 1;
1594 match_len += 1;
1595 }
1596 MatchCandidate {
1597 start: abs_pos,
1598 offset: abs_pos - candidate_pos,
1599 match_len,
1600 }
1601}
1602
1603fn repcode_candidate_shared(
1604 concat: &[u8],
1605 history_abs_start: usize,
1606 offset_hist: [u32; 3],
1607 abs_pos: usize,
1608 lit_len: usize,
1609 min_match_len: usize,
1610) -> Option<MatchCandidate> {
1611 let reps = if lit_len == 0 {
1612 [
1613 Some(offset_hist[1] as usize),
1614 Some(offset_hist[2] as usize),
1615 (offset_hist[0] > 1).then_some((offset_hist[0] - 1) as usize),
1616 ]
1617 } else {
1618 [
1619 Some(offset_hist[0] as usize),
1620 Some(offset_hist[1] as usize),
1621 Some(offset_hist[2] as usize),
1622 ]
1623 };
1624
1625 let current_idx = abs_pos - history_abs_start;
1626 if current_idx + min_match_len > concat.len() {
1627 return None;
1628 }
1629
1630 let mut best = None;
1631 for rep in reps.into_iter().flatten() {
1632 if rep == 0 || rep > abs_pos {
1633 continue;
1634 }
1635 let candidate_pos = abs_pos - rep;
1636 if candidate_pos < history_abs_start {
1637 continue;
1638 }
1639 let candidate_idx = candidate_pos - history_abs_start;
1640 let match_len =
1641 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
1642 if match_len >= min_match_len {
1643 let candidate = extend_backwards_shared(
1644 concat,
1645 history_abs_start,
1646 candidate_pos,
1647 abs_pos,
1648 match_len,
1649 lit_len,
1650 );
1651 best = best_len_offset_candidate(best, Some(candidate));
1652 }
1653 }
1654 best
1655}
1656
1657#[derive(Copy, Clone)]
1658struct LazyMatchConfig {
1659 target_len: usize,
1660 min_match_len: usize,
1661 lazy_depth: u8,
1662 history_abs_end: usize,
1663}
1664
1665fn pick_lazy_match_shared(
1666 abs_pos: usize,
1667 lit_len: usize,
1668 best: Option<MatchCandidate>,
1669 config: LazyMatchConfig,
1670 mut best_match_at: impl FnMut(usize, usize) -> Option<MatchCandidate>,
1671) -> Option<MatchCandidate> {
1672 let best = best?;
1673 if best.match_len >= config.target_len
1674 || abs_pos + 1 + config.min_match_len > config.history_abs_end
1675 {
1676 return Some(best);
1677 }
1678
1679 let next = best_match_at(abs_pos + 1, lit_len + 1);
1680 if let Some(next) = next
1681 && (next.match_len > best.match_len
1682 || (next.match_len == best.match_len && next.offset < best.offset))
1683 {
1684 return None;
1685 }
1686
1687 if config.lazy_depth >= 2 && abs_pos + 2 + config.min_match_len <= config.history_abs_end {
1688 let next2 = best_match_at(abs_pos + 2, lit_len + 2);
1689 if let Some(next2) = next2
1690 && next2.match_len > best.match_len + 1
1691 {
1692 return None;
1693 }
1694 }
1695
1696 Some(best)
1697}
1698
1699impl DfastMatchGenerator {
1700 const BOUNDARY_DENSE_TAIL_LEN: usize = DFAST_MIN_MATCH_LEN + 3;
1707
1708 fn new(max_window_size: usize) -> Self {
1709 Self {
1710 max_window_size,
1711 window: VecDeque::new(),
1712 window_size: 0,
1713 history: Vec::new(),
1714 history_start: 0,
1715 history_abs_start: 0,
1716 offset_hist: [1, 4, 8],
1717 short_hash: Vec::new(),
1718 long_hash: Vec::new(),
1719 hash_bits: DFAST_HASH_BITS,
1720 hash_mix_kernel: detect_hash_mix_kernel(),
1721 use_fast_loop: false,
1722 lazy_depth: 1,
1723 }
1724 }
1725
1726 fn set_hash_bits(&mut self, bits: usize) {
1727 let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
1728 if self.hash_bits != clamped {
1729 self.hash_bits = clamped;
1730 self.short_hash = Vec::new();
1731 self.long_hash = Vec::new();
1732 }
1733 }
1734
1735 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1736 self.window_size = 0;
1737 self.history.clear();
1738 self.history_start = 0;
1739 self.history_abs_start = 0;
1740 self.offset_hist = [1, 4, 8];
1741 if !self.short_hash.is_empty() {
1742 self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1743 self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]);
1744 }
1745 for mut data in self.window.drain(..) {
1746 data.resize(data.capacity(), 0);
1747 reuse_space(data);
1748 }
1749 }
1750
1751 fn get_last_space(&self) -> &[u8] {
1752 self.window.back().unwrap().as_slice()
1753 }
1754
1755 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
1756 assert!(data.len() <= self.max_window_size);
1757 while self.window_size + data.len() > self.max_window_size {
1758 let removed = self.window.pop_front().unwrap();
1759 self.window_size -= removed.len();
1760 self.history_start += removed.len();
1761 self.history_abs_start += removed.len();
1762 reuse_space(removed);
1763 }
1764 self.compact_history();
1765 self.history.extend_from_slice(&data);
1766 self.window_size += data.len();
1767 self.window.push_back(data);
1768 }
1769
1770 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
1771 while self.window_size > self.max_window_size {
1772 let removed = self.window.pop_front().unwrap();
1773 self.window_size -= removed.len();
1774 self.history_start += removed.len();
1775 self.history_abs_start += removed.len();
1776 reuse_space(removed);
1777 }
1778 }
1779
1780 fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
1781 self.ensure_hash_tables();
1782 let current_len = self.window.back().unwrap().len();
1783 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1784 let current_abs_end = current_abs_start + current_len;
1785 let tail_start = current_abs_start.saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN);
1786 if tail_start < current_abs_start {
1787 self.insert_positions(tail_start, current_abs_start);
1788 }
1789
1790 let used_sparse = incompressible_hint
1791 .unwrap_or_else(|| self.block_looks_incompressible(current_abs_start, current_abs_end));
1792 if used_sparse {
1793 self.insert_positions_with_step(
1794 current_abs_start,
1795 current_abs_end,
1796 DFAST_INCOMPRESSIBLE_SKIP_STEP,
1797 );
1798 } else {
1799 self.insert_positions(current_abs_start, current_abs_end);
1800 }
1801
1802 if used_sparse {
1805 let tail_start = current_abs_end
1806 .saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN)
1807 .max(current_abs_start);
1808 if tail_start < current_abs_end {
1809 self.insert_positions(tail_start, current_abs_end);
1810 }
1811 }
1812 }
1813
1814 fn skip_matching_dense(&mut self) {
1815 self.ensure_hash_tables();
1816 let current_len = self.window.back().unwrap().len();
1817 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1818 let current_abs_end = current_abs_start + current_len;
1819 let backfill_start = current_abs_start
1820 .saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN)
1821 .max(self.history_abs_start);
1822 if backfill_start < current_abs_start {
1823 self.insert_positions(backfill_start, current_abs_start);
1824 }
1825 self.insert_positions(current_abs_start, current_abs_end);
1826 }
1827
1828 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1829 self.ensure_hash_tables();
1830
1831 let current_len = self.window.back().unwrap().len();
1832 if current_len == 0 {
1833 return;
1834 }
1835
1836 let current_abs_start = self.history_abs_start + self.window_size - current_len;
1837 if self.use_fast_loop {
1838 self.start_matching_fast_loop(current_abs_start, current_len, &mut handle_sequence);
1839 return;
1840 }
1841 self.start_matching_general(current_abs_start, current_len, &mut handle_sequence);
1842 }
1843
1844 fn start_matching_general(
1845 &mut self,
1846 current_abs_start: usize,
1847 current_len: usize,
1848 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1849 ) {
1850 let use_adaptive_skip =
1851 self.block_looks_incompressible(current_abs_start, current_abs_start + current_len);
1852 let mut pos = 0usize;
1853 let mut literals_start = 0usize;
1854 let mut skip_step = 1usize;
1855 let mut next_skip_growth_pos = DFAST_SKIP_STEP_GROWTH_INTERVAL;
1856 let mut miss_run = 0usize;
1857 while pos + DFAST_MIN_MATCH_LEN <= current_len {
1858 let abs_pos = current_abs_start + pos;
1859 let lit_len = pos - literals_start;
1860
1861 let best = self.best_match(abs_pos, lit_len);
1862 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
1863 let start = self.emit_candidate(
1864 current_abs_start,
1865 &mut literals_start,
1866 candidate,
1867 handle_sequence,
1868 );
1869 pos = start + candidate.match_len;
1870 skip_step = 1;
1871 next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1872 miss_run = 0;
1873 } else {
1874 self.insert_position(abs_pos);
1875 miss_run = miss_run.saturating_add(1);
1876 let use_local_adaptive_skip = miss_run >= DFAST_LOCAL_SKIP_TRIGGER;
1877 if use_adaptive_skip || use_local_adaptive_skip {
1878 let skip_cap = if use_adaptive_skip {
1879 DFAST_MAX_SKIP_STEP
1880 } else {
1881 2
1882 };
1883 if pos >= next_skip_growth_pos {
1884 skip_step = (skip_step + 1).min(skip_cap);
1885 next_skip_growth_pos =
1886 next_skip_growth_pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1887 }
1888 pos = pos.saturating_add(skip_step);
1889 } else {
1890 pos += 1;
1891 }
1892 }
1893 }
1894
1895 self.seed_remaining_hashable_starts(current_abs_start, current_len, pos);
1896 self.emit_trailing_literals(literals_start, handle_sequence);
1897 }
1898
1899 fn start_matching_fast_loop(
1900 &mut self,
1901 current_abs_start: usize,
1902 current_len: usize,
1903 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1904 ) {
1905 let block_is_strict_incompressible = self
1906 .block_looks_incompressible_strict(current_abs_start, current_abs_start + current_len);
1907 let mut pos = 0usize;
1908 let mut literals_start = 0usize;
1909 let mut skip_step = 1usize;
1910 let mut next_skip_growth_pos = DFAST_SKIP_STEP_GROWTH_INTERVAL;
1911 let mut miss_run = 0usize;
1912 while pos + DFAST_MIN_MATCH_LEN <= current_len {
1913 let ip0 = pos;
1914 let ip1 = ip0.saturating_add(1);
1915 let ip2 = ip0.saturating_add(2);
1916 let ip3 = ip0.saturating_add(3);
1917
1918 let abs_ip0 = current_abs_start + ip0;
1919 let lit_len_ip0 = ip0 - literals_start;
1920
1921 if ip2 + DFAST_MIN_MATCH_LEN <= current_len {
1922 let abs_ip2 = current_abs_start + ip2;
1923 let lit_len_ip2 = ip2 - literals_start;
1924 if let Some(rep) = self.repcode_candidate(abs_ip2, lit_len_ip2)
1925 && rep.start >= current_abs_start + literals_start
1926 && rep.start <= abs_ip2
1927 {
1928 let start = self.emit_candidate(
1929 current_abs_start,
1930 &mut literals_start,
1931 rep,
1932 handle_sequence,
1933 );
1934 pos = start + rep.match_len;
1935 skip_step = 1;
1936 next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1937 miss_run = 0;
1938 continue;
1939 }
1940 }
1941
1942 let best = self.best_match(abs_ip0, lit_len_ip0);
1943 if let Some(candidate) = best {
1944 let start = self.emit_candidate(
1945 current_abs_start,
1946 &mut literals_start,
1947 candidate,
1948 handle_sequence,
1949 );
1950 pos = start + candidate.match_len;
1951 skip_step = 1;
1952 next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1953 miss_run = 0;
1954 } else {
1955 self.insert_position(abs_ip0);
1956 if ip1 + 4 <= current_len {
1957 self.insert_position(current_abs_start + ip1);
1958 }
1959 if ip2 + 4 <= current_len {
1960 self.insert_position(current_abs_start + ip2);
1961 }
1962 if ip3 + 4 <= current_len {
1963 self.insert_position(current_abs_start + ip3);
1964 }
1965 miss_run = miss_run.saturating_add(1);
1966 if block_is_strict_incompressible || miss_run >= DFAST_LOCAL_SKIP_TRIGGER {
1967 let skip_cap = DFAST_MAX_SKIP_STEP;
1968 if pos >= next_skip_growth_pos {
1969 skip_step = (skip_step + 1).min(skip_cap);
1970 next_skip_growth_pos =
1971 next_skip_growth_pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1972 }
1973 pos = pos.saturating_add(skip_step);
1974 } else {
1975 skip_step = 1;
1976 next_skip_growth_pos = pos.saturating_add(DFAST_SKIP_STEP_GROWTH_INTERVAL);
1977 pos += 1;
1978 }
1979 }
1980 }
1981
1982 self.seed_remaining_hashable_starts(current_abs_start, current_len, pos);
1983 self.emit_trailing_literals(literals_start, handle_sequence);
1984 }
1985
1986 fn seed_remaining_hashable_starts(
1987 &mut self,
1988 current_abs_start: usize,
1989 current_len: usize,
1990 pos: usize,
1991 ) {
1992 let boundary_tail_start = current_len.saturating_sub(Self::BOUNDARY_DENSE_TAIL_LEN);
1993 let mut seed_pos = pos.min(current_len).min(boundary_tail_start);
1994 while seed_pos + DFAST_SHORT_HASH_LOOKAHEAD <= current_len {
1995 self.insert_position(current_abs_start + seed_pos);
1996 seed_pos += 1;
1997 }
1998 }
1999
2000 fn emit_candidate(
2001 &mut self,
2002 current_abs_start: usize,
2003 literals_start: &mut usize,
2004 candidate: MatchCandidate,
2005 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
2006 ) -> usize {
2007 self.insert_positions(
2008 current_abs_start + *literals_start,
2009 candidate.start + candidate.match_len,
2010 );
2011 let current = self.window.back().unwrap().as_slice();
2012 let start = candidate.start - current_abs_start;
2013 let literals = ¤t[*literals_start..start];
2014 handle_sequence(Sequence::Triple {
2015 literals,
2016 offset: candidate.offset,
2017 match_len: candidate.match_len,
2018 });
2019 let _ = encode_offset_with_history(
2020 candidate.offset as u32,
2021 literals.len() as u32,
2022 &mut self.offset_hist,
2023 );
2024 *literals_start = start + candidate.match_len;
2025 start
2026 }
2027
2028 fn emit_trailing_literals(
2029 &self,
2030 literals_start: usize,
2031 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
2032 ) {
2033 if literals_start < self.window.back().unwrap().len() {
2034 let current = self.window.back().unwrap().as_slice();
2035 handle_sequence(Sequence::Literals {
2036 literals: ¤t[literals_start..],
2037 });
2038 }
2039 }
2040
2041 fn ensure_hash_tables(&mut self) {
2042 let table_len = 1usize << self.hash_bits;
2043 if self.short_hash.len() != table_len {
2044 self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
2048 self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
2049 }
2050 }
2051
2052 fn compact_history(&mut self) {
2053 if self.history_start == 0 {
2054 return;
2055 }
2056 if self.history_start >= self.max_window_size
2057 || self.history_start * 2 >= self.history.len()
2058 {
2059 self.history.drain(..self.history_start);
2060 self.history_start = 0;
2061 }
2062 }
2063
2064 fn live_history(&self) -> &[u8] {
2065 &self.history[self.history_start..]
2066 }
2067
2068 fn history_abs_end(&self) -> usize {
2069 self.history_abs_start + self.live_history().len()
2070 }
2071
2072 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2073 let rep = self.repcode_candidate(abs_pos, lit_len);
2074 let hash = self.hash_candidate(abs_pos, lit_len);
2075 best_len_offset_candidate(rep, hash)
2076 }
2077
2078 fn pick_lazy_match(
2079 &self,
2080 abs_pos: usize,
2081 lit_len: usize,
2082 best: Option<MatchCandidate>,
2083 ) -> Option<MatchCandidate> {
2084 pick_lazy_match_shared(
2085 abs_pos,
2086 lit_len,
2087 best,
2088 LazyMatchConfig {
2089 target_len: DFAST_TARGET_LEN,
2090 min_match_len: DFAST_MIN_MATCH_LEN,
2091 lazy_depth: self.lazy_depth,
2092 history_abs_end: self.history_abs_end(),
2093 },
2094 |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
2095 )
2096 }
2097
2098 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2099 repcode_candidate_shared(
2100 self.live_history(),
2101 self.history_abs_start,
2102 self.offset_hist,
2103 abs_pos,
2104 lit_len,
2105 DFAST_MIN_MATCH_LEN,
2106 )
2107 }
2108
2109 fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2110 let concat = self.live_history();
2111 let current_idx = abs_pos - self.history_abs_start;
2112 let mut best = None;
2113 for candidate_pos in self.long_candidates(abs_pos) {
2114 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
2115 continue;
2116 }
2117 let candidate_idx = candidate_pos - self.history_abs_start;
2118 let match_len =
2119 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2120 if match_len >= DFAST_MIN_MATCH_LEN {
2121 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2122 best = best_len_offset_candidate(best, Some(candidate));
2123 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
2124 return best;
2125 }
2126 }
2127 }
2128
2129 for candidate_pos in self.short_candidates(abs_pos) {
2130 if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos {
2131 continue;
2132 }
2133 let candidate_idx = candidate_pos - self.history_abs_start;
2134 let match_len =
2135 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2136 if match_len >= DFAST_MIN_MATCH_LEN {
2137 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2138 best = best_len_offset_candidate(best, Some(candidate));
2139 if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) {
2140 return best;
2141 }
2142 }
2143 }
2144 best
2145 }
2146
2147 fn extend_backwards(
2148 &self,
2149 candidate_pos: usize,
2150 abs_pos: usize,
2151 match_len: usize,
2152 lit_len: usize,
2153 ) -> MatchCandidate {
2154 extend_backwards_shared(
2155 self.live_history(),
2156 self.history_abs_start,
2157 candidate_pos,
2158 abs_pos,
2159 match_len,
2160 lit_len,
2161 )
2162 }
2163
2164 fn insert_positions(&mut self, start: usize, end: usize) {
2165 let start = start.max(self.history_abs_start);
2166 let end = end.min(self.history_abs_end());
2167 for pos in start..end {
2168 self.insert_position(pos);
2169 }
2170 }
2171
2172 fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
2173 let start = start.max(self.history_abs_start);
2174 let end = end.min(self.history_abs_end());
2175 if step <= 1 {
2176 self.insert_positions(start, end);
2177 return;
2178 }
2179 let mut pos = start;
2180 while pos < end {
2181 self.insert_position(pos);
2182 pos = pos.saturating_add(step);
2183 }
2184 }
2185
2186 fn insert_position(&mut self, pos: usize) {
2187 let idx = pos - self.history_abs_start;
2188 let short = {
2189 let concat = self.live_history();
2190 (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
2191 };
2192 if let Some(short) = short {
2193 let bucket = &mut self.short_hash[short];
2194 if bucket[0] != pos {
2195 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
2196 bucket[0] = pos;
2197 }
2198 }
2199
2200 let long = {
2201 let concat = self.live_history();
2202 (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
2203 };
2204 if let Some(long) = long {
2205 let bucket = &mut self.long_hash[long];
2206 if bucket[0] != pos {
2207 bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1);
2208 bucket[0] = pos;
2209 }
2210 }
2211 }
2212
2213 fn short_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
2214 let concat = self.live_history();
2215 let idx = pos - self.history_abs_start;
2216 (idx + 4 <= concat.len())
2217 .then(|| self.short_hash[self.hash4(&concat[idx..])])
2218 .into_iter()
2219 .flatten()
2220 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
2221 }
2222
2223 fn long_candidates(&self, pos: usize) -> impl Iterator<Item = usize> + '_ {
2224 let concat = self.live_history();
2225 let idx = pos - self.history_abs_start;
2226 (idx + 8 <= concat.len())
2227 .then(|| self.long_hash[self.hash8(&concat[idx..])])
2228 .into_iter()
2229 .flatten()
2230 .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
2231 }
2232
2233 fn hash4(&self, data: &[u8]) -> usize {
2234 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2235 self.hash_index(value)
2236 }
2237
2238 fn hash8(&self, data: &[u8]) -> usize {
2239 let value = u64::from_le_bytes(data[..8].try_into().unwrap());
2240 self.hash_index(value)
2241 }
2242
2243 fn block_looks_incompressible(&self, start: usize, end: usize) -> bool {
2244 let live = self.live_history();
2245 if start >= end || start < self.history_abs_start {
2246 return false;
2247 }
2248 let start_idx = start - self.history_abs_start;
2249 let end_idx = end - self.history_abs_start;
2250 if end_idx > live.len() {
2251 return false;
2252 }
2253 let block = &live[start_idx..end_idx];
2254 block_looks_incompressible(block)
2255 }
2256
2257 fn block_looks_incompressible_strict(&self, start: usize, end: usize) -> bool {
2258 let live = self.live_history();
2259 if start >= end || start < self.history_abs_start {
2260 return false;
2261 }
2262 let start_idx = start - self.history_abs_start;
2263 let end_idx = end - self.history_abs_start;
2264 if end_idx > live.len() {
2265 return false;
2266 }
2267 let block = &live[start_idx..end_idx];
2268 block_looks_incompressible_strict(block)
2269 }
2270
2271 fn hash_index(&self, value: u64) -> usize {
2272 (hash_mix_u64_with_kernel(value, self.hash_mix_kernel) >> (64 - self.hash_bits)) as usize
2273 }
2274}
2275
2276struct RowMatchGenerator {
2277 max_window_size: usize,
2278 window: VecDeque<Vec<u8>>,
2279 window_size: usize,
2280 history: Vec<u8>,
2281 history_start: usize,
2282 history_abs_start: usize,
2283 offset_hist: [u32; 3],
2284 row_hash_log: usize,
2285 row_log: usize,
2286 search_depth: usize,
2287 target_len: usize,
2288 lazy_depth: u8,
2289 hash_mix_kernel: HashMixKernel,
2290 row_heads: Vec<u8>,
2291 row_positions: Vec<usize>,
2292 row_tags: Vec<u8>,
2293}
2294
2295impl RowMatchGenerator {
2296 fn new(max_window_size: usize) -> Self {
2297 Self {
2298 max_window_size,
2299 window: VecDeque::new(),
2300 window_size: 0,
2301 history: Vec::new(),
2302 history_start: 0,
2303 history_abs_start: 0,
2304 offset_hist: [1, 4, 8],
2305 row_hash_log: ROW_HASH_BITS - ROW_LOG,
2306 row_log: ROW_LOG,
2307 search_depth: ROW_SEARCH_DEPTH,
2308 target_len: ROW_TARGET_LEN,
2309 lazy_depth: 1,
2310 hash_mix_kernel: detect_hash_mix_kernel(),
2311 row_heads: Vec::new(),
2312 row_positions: Vec::new(),
2313 row_tags: Vec::new(),
2314 }
2315 }
2316
2317 fn set_hash_bits(&mut self, bits: usize) {
2318 let clamped = bits.clamp(self.row_log + 1, ROW_HASH_BITS);
2319 let row_hash_log = clamped.saturating_sub(self.row_log);
2320 if self.row_hash_log != row_hash_log {
2321 self.row_hash_log = row_hash_log;
2322 self.row_heads.clear();
2323 self.row_positions.clear();
2324 self.row_tags.clear();
2325 }
2326 }
2327
2328 fn configure(&mut self, config: RowConfig) {
2329 self.row_log = config.row_log.clamp(4, 6);
2330 self.search_depth = config.search_depth;
2331 self.target_len = config.target_len;
2332 self.set_hash_bits(config.hash_bits.max(self.row_log + 1));
2333 }
2334
2335 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2336 self.window_size = 0;
2337 self.history.clear();
2338 self.history_start = 0;
2339 self.history_abs_start = 0;
2340 self.offset_hist = [1, 4, 8];
2341 self.row_heads.fill(0);
2342 self.row_positions.fill(ROW_EMPTY_SLOT);
2343 self.row_tags.fill(0);
2344 for mut data in self.window.drain(..) {
2345 data.resize(data.capacity(), 0);
2346 reuse_space(data);
2347 }
2348 }
2349
2350 fn get_last_space(&self) -> &[u8] {
2351 self.window.back().unwrap().as_slice()
2352 }
2353
2354 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2355 assert!(data.len() <= self.max_window_size);
2356 while self.window_size + data.len() > self.max_window_size {
2357 let removed = self.window.pop_front().unwrap();
2358 self.window_size -= removed.len();
2359 self.history_start += removed.len();
2360 self.history_abs_start += removed.len();
2361 reuse_space(removed);
2362 }
2363 self.compact_history();
2364 self.history.extend_from_slice(&data);
2365 self.window_size += data.len();
2366 self.window.push_back(data);
2367 }
2368
2369 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2370 while self.window_size > self.max_window_size {
2371 let removed = self.window.pop_front().unwrap();
2372 self.window_size -= removed.len();
2373 self.history_start += removed.len();
2374 self.history_abs_start += removed.len();
2375 reuse_space(removed);
2376 }
2377 }
2378
2379 fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
2380 self.ensure_tables();
2381 let current_len = self.window.back().unwrap().len();
2382 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2383 let current_abs_end = current_abs_start + current_len;
2384 let backfill_start = self.backfill_start(current_abs_start);
2385 if backfill_start < current_abs_start {
2386 self.insert_positions(backfill_start, current_abs_start);
2387 }
2388 if incompressible_hint == Some(true) {
2389 self.insert_positions_with_step(
2390 current_abs_start,
2391 current_abs_end,
2392 INCOMPRESSIBLE_SKIP_STEP,
2393 );
2394 let dense_tail = ROW_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
2395 let tail_start = current_abs_end
2396 .saturating_sub(dense_tail)
2397 .max(current_abs_start);
2398 for pos in tail_start..current_abs_end {
2399 if !(pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
2400 self.insert_position(pos);
2401 }
2402 }
2403 } else {
2404 self.insert_positions(current_abs_start, current_abs_end);
2405 }
2406 }
2407
2408 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2409 self.ensure_tables();
2410
2411 let current_len = self.window.back().unwrap().len();
2412 if current_len == 0 {
2413 return;
2414 }
2415 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2416 let backfill_start = self.backfill_start(current_abs_start);
2417 if backfill_start < current_abs_start {
2418 self.insert_positions(backfill_start, current_abs_start);
2419 }
2420
2421 let mut pos = 0usize;
2422 let mut literals_start = 0usize;
2423 while pos + ROW_MIN_MATCH_LEN <= current_len {
2424 let abs_pos = current_abs_start + pos;
2425 let lit_len = pos - literals_start;
2426
2427 let best = self.best_match(abs_pos, lit_len);
2428 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2429 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2430 let current = self.window.back().unwrap().as_slice();
2431 let start = candidate.start - current_abs_start;
2432 let literals = ¤t[literals_start..start];
2433 handle_sequence(Sequence::Triple {
2434 literals,
2435 offset: candidate.offset,
2436 match_len: candidate.match_len,
2437 });
2438 let _ = encode_offset_with_history(
2439 candidate.offset as u32,
2440 literals.len() as u32,
2441 &mut self.offset_hist,
2442 );
2443 pos = start + candidate.match_len;
2444 literals_start = pos;
2445 } else {
2446 self.insert_position(abs_pos);
2447 pos += 1;
2448 }
2449 }
2450
2451 while pos + ROW_HASH_KEY_LEN <= current_len {
2452 self.insert_position(current_abs_start + pos);
2453 pos += 1;
2454 }
2455
2456 if literals_start < current_len {
2457 let current = self.window.back().unwrap().as_slice();
2458 handle_sequence(Sequence::Literals {
2459 literals: ¤t[literals_start..],
2460 });
2461 }
2462 }
2463
2464 fn ensure_tables(&mut self) {
2465 let row_count = 1usize << self.row_hash_log;
2466 let row_entries = 1usize << self.row_log;
2467 let total = row_count * row_entries;
2468 if self.row_positions.len() != total {
2469 self.row_heads = alloc::vec![0; row_count];
2470 self.row_positions = alloc::vec![ROW_EMPTY_SLOT; total];
2471 self.row_tags = alloc::vec![0; total];
2472 }
2473 }
2474
2475 fn compact_history(&mut self) {
2476 if self.history_start == 0 {
2477 return;
2478 }
2479 if self.history_start >= self.max_window_size
2480 || self.history_start * 2 >= self.history.len()
2481 {
2482 self.history.drain(..self.history_start);
2483 self.history_start = 0;
2484 }
2485 }
2486
2487 fn live_history(&self) -> &[u8] {
2488 &self.history[self.history_start..]
2489 }
2490
2491 fn history_abs_end(&self) -> usize {
2492 self.history_abs_start + self.live_history().len()
2493 }
2494
2495 fn hash_and_row(&self, abs_pos: usize) -> Option<(usize, u8)> {
2496 let idx = abs_pos - self.history_abs_start;
2497 let concat = self.live_history();
2498 if idx + ROW_HASH_KEY_LEN > concat.len() {
2499 return None;
2500 }
2501 let value =
2502 u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
2503 let hash = hash_mix_u64_with_kernel(value, self.hash_mix_kernel);
2504 let total_bits = self.row_hash_log + ROW_TAG_BITS;
2505 let combined = hash >> (u64::BITS as usize - total_bits);
2506 let row_mask = (1usize << self.row_hash_log) - 1;
2507 let row = ((combined >> ROW_TAG_BITS) as usize) & row_mask;
2508 let tag = combined as u8;
2509 Some((row, tag))
2510 }
2511
2512 fn backfill_start(&self, current_abs_start: usize) -> usize {
2513 current_abs_start
2514 .saturating_sub(ROW_HASH_KEY_LEN - 1)
2515 .max(self.history_abs_start)
2516 }
2517
2518 fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2519 let rep = self.repcode_candidate(abs_pos, lit_len);
2520 let row = self.row_candidate(abs_pos, lit_len);
2521 best_len_offset_candidate(rep, row)
2522 }
2523
2524 fn pick_lazy_match(
2525 &self,
2526 abs_pos: usize,
2527 lit_len: usize,
2528 best: Option<MatchCandidate>,
2529 ) -> Option<MatchCandidate> {
2530 pick_lazy_match_shared(
2531 abs_pos,
2532 lit_len,
2533 best,
2534 LazyMatchConfig {
2535 target_len: self.target_len,
2536 min_match_len: ROW_MIN_MATCH_LEN,
2537 lazy_depth: self.lazy_depth,
2538 history_abs_end: self.history_abs_end(),
2539 },
2540 |next_pos, next_lit_len| self.best_match(next_pos, next_lit_len),
2541 )
2542 }
2543
2544 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2545 repcode_candidate_shared(
2546 self.live_history(),
2547 self.history_abs_start,
2548 self.offset_hist,
2549 abs_pos,
2550 lit_len,
2551 ROW_MIN_MATCH_LEN,
2552 )
2553 }
2554
2555 fn row_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
2556 let concat = self.live_history();
2557 let current_idx = abs_pos - self.history_abs_start;
2558 if current_idx + ROW_MIN_MATCH_LEN > concat.len() {
2559 return None;
2560 }
2561
2562 let (row, tag) = self.hash_and_row(abs_pos)?;
2563 let row_entries = 1usize << self.row_log;
2564 let row_mask = row_entries - 1;
2565 let row_base = row << self.row_log;
2566 let head = self.row_heads[row] as usize;
2567 let max_walk = self.search_depth.min(row_entries);
2568
2569 let mut best = None;
2570 for i in 0..max_walk {
2571 let slot = (head + i) & row_mask;
2572 let idx = row_base + slot;
2573 if self.row_tags[idx] != tag {
2574 continue;
2575 }
2576 let candidate_pos = self.row_positions[idx];
2577 if candidate_pos == ROW_EMPTY_SLOT
2578 || candidate_pos < self.history_abs_start
2579 || candidate_pos >= abs_pos
2580 {
2581 continue;
2582 }
2583 let candidate_idx = candidate_pos - self.history_abs_start;
2584 let match_len =
2585 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
2586 if match_len >= ROW_MIN_MATCH_LEN {
2587 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
2588 best = best_len_offset_candidate(best, Some(candidate));
2589 if best.is_some_and(|best| best.match_len >= self.target_len) {
2590 return best;
2591 }
2592 }
2593 }
2594 best
2595 }
2596
2597 fn extend_backwards(
2598 &self,
2599 candidate_pos: usize,
2600 abs_pos: usize,
2601 match_len: usize,
2602 lit_len: usize,
2603 ) -> MatchCandidate {
2604 extend_backwards_shared(
2605 self.live_history(),
2606 self.history_abs_start,
2607 candidate_pos,
2608 abs_pos,
2609 match_len,
2610 lit_len,
2611 )
2612 }
2613
2614 fn insert_positions(&mut self, start: usize, end: usize) {
2615 for pos in start..end {
2616 self.insert_position(pos);
2617 }
2618 }
2619
2620 fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
2621 if step <= 1 {
2622 self.insert_positions(start, end);
2623 return;
2624 }
2625 let mut pos = start;
2626 while pos < end {
2627 self.insert_position(pos);
2628 let next = pos.saturating_add(step);
2629 if next <= pos {
2630 break;
2631 }
2632 pos = next;
2633 }
2634 }
2635
2636 fn insert_position(&mut self, abs_pos: usize) {
2637 let Some((row, tag)) = self.hash_and_row(abs_pos) else {
2638 return;
2639 };
2640 let row_entries = 1usize << self.row_log;
2641 let row_mask = row_entries - 1;
2642 let row_base = row << self.row_log;
2643 let head = self.row_heads[row] as usize;
2644 let next = head.wrapping_sub(1) & row_mask;
2645 self.row_heads[row] = next as u8;
2646 self.row_tags[row_base + next] = tag;
2647 self.row_positions[row_base + next] = abs_pos;
2648 }
2649}
2650
2651struct HcMatchGenerator {
2652 max_window_size: usize,
2653 window: VecDeque<Vec<u8>>,
2654 window_size: usize,
2655 history: Vec<u8>,
2656 history_start: usize,
2657 history_abs_start: usize,
2658 position_base: usize,
2659 offset_hist: [u32; 3],
2660 hash_table: Vec<u32>,
2661 chain_table: Vec<u32>,
2662 lazy_depth: u8,
2663 hash_log: usize,
2664 chain_log: usize,
2665 search_depth: usize,
2666 target_len: usize,
2667}
2668
2669impl HcMatchGenerator {
2670 fn new(max_window_size: usize) -> Self {
2671 Self {
2672 max_window_size,
2673 window: VecDeque::new(),
2674 window_size: 0,
2675 history: Vec::new(),
2676 history_start: 0,
2677 history_abs_start: 0,
2678 position_base: 0,
2679 offset_hist: [1, 4, 8],
2680 hash_table: Vec::new(),
2681 chain_table: Vec::new(),
2682 lazy_depth: 2,
2683 hash_log: HC_HASH_LOG,
2684 chain_log: HC_CHAIN_LOG,
2685 search_depth: HC_SEARCH_DEPTH,
2686 target_len: HC_TARGET_LEN,
2687 }
2688 }
2689
2690 fn configure(&mut self, config: HcConfig) {
2691 let resize = self.hash_log != config.hash_log || self.chain_log != config.chain_log;
2692 self.hash_log = config.hash_log;
2693 self.chain_log = config.chain_log;
2694 self.search_depth = config.search_depth.min(MAX_HC_SEARCH_DEPTH);
2695 self.target_len = config.target_len;
2696 if resize && !self.hash_table.is_empty() {
2697 self.hash_table.clear();
2699 self.chain_table.clear();
2700 }
2701 }
2702
2703 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2704 self.window_size = 0;
2705 self.history.clear();
2706 self.history_start = 0;
2707 self.history_abs_start = 0;
2708 self.position_base = 0;
2709 self.offset_hist = [1, 4, 8];
2710 if !self.hash_table.is_empty() {
2711 self.hash_table.fill(HC_EMPTY);
2712 self.chain_table.fill(HC_EMPTY);
2713 }
2714 for mut data in self.window.drain(..) {
2715 data.resize(data.capacity(), 0);
2716 reuse_space(data);
2717 }
2718 }
2719
2720 fn get_last_space(&self) -> &[u8] {
2721 self.window.back().unwrap().as_slice()
2722 }
2723
2724 fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
2728 assert!(data.len() <= self.max_window_size);
2729 while self.window_size + data.len() > self.max_window_size {
2730 let removed = self.window.pop_front().unwrap();
2731 self.window_size -= removed.len();
2732 self.history_start += removed.len();
2733 self.history_abs_start += removed.len();
2734 reuse_space(removed);
2735 }
2736 self.compact_history();
2737 self.history.extend_from_slice(&data);
2738 self.window_size += data.len();
2739 self.window.push_back(data);
2740 }
2741
2742 fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
2743 while self.window_size > self.max_window_size {
2744 let removed = self.window.pop_front().unwrap();
2745 self.window_size -= removed.len();
2746 self.history_start += removed.len();
2747 self.history_abs_start += removed.len();
2748 reuse_space(removed);
2749 }
2750 }
2751
2752 fn backfill_boundary_positions(&mut self, current_abs_start: usize) {
2755 let backfill_start = current_abs_start
2756 .saturating_sub(3)
2757 .max(self.history_abs_start);
2758 if backfill_start < current_abs_start {
2759 self.insert_positions(backfill_start, current_abs_start);
2760 }
2761 }
2762
2763 fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
2764 self.ensure_tables();
2765 let current_len = self.window.back().unwrap().len();
2766 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2767 let current_abs_end = current_abs_start + current_len;
2768 self.backfill_boundary_positions(current_abs_start);
2769 if incompressible_hint == Some(true) {
2770 self.insert_positions_with_step(
2771 current_abs_start,
2772 current_abs_end,
2773 INCOMPRESSIBLE_SKIP_STEP,
2774 );
2775 let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
2776 let tail_start = current_abs_end
2777 .saturating_sub(dense_tail)
2778 .max(self.history_abs_start);
2779 let tail_start = tail_start.max(current_abs_start);
2780 for pos in tail_start..current_abs_end {
2781 if !(pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
2782 self.insert_position(pos);
2783 }
2784 }
2785 } else {
2786 self.insert_positions(current_abs_start, current_abs_end);
2787 }
2788 }
2789
2790 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2791 self.ensure_tables();
2792
2793 let current_len = self.window.back().unwrap().len();
2794 if current_len == 0 {
2795 return;
2796 }
2797
2798 let current_abs_start = self.history_abs_start + self.window_size - current_len;
2799 self.backfill_boundary_positions(current_abs_start);
2800
2801 let mut pos = 0usize;
2802 let mut literals_start = 0usize;
2803 while pos + HC_MIN_MATCH_LEN <= current_len {
2804 let abs_pos = current_abs_start + pos;
2805 let lit_len = pos - literals_start;
2806
2807 let best = self.find_best_match(abs_pos, lit_len);
2808 if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) {
2809 self.insert_positions(abs_pos, candidate.start + candidate.match_len);
2810 let current = self.window.back().unwrap().as_slice();
2811 let start = candidate.start - current_abs_start;
2812 let literals = ¤t[literals_start..start];
2813 handle_sequence(Sequence::Triple {
2814 literals,
2815 offset: candidate.offset,
2816 match_len: candidate.match_len,
2817 });
2818 let _ = encode_offset_with_history(
2819 candidate.offset as u32,
2820 literals.len() as u32,
2821 &mut self.offset_hist,
2822 );
2823 pos = start + candidate.match_len;
2824 literals_start = pos;
2825 } else {
2826 self.insert_position(abs_pos);
2827 pos += 1;
2828 }
2829 }
2830
2831 while pos + 4 <= current_len {
2834 self.insert_position(current_abs_start + pos);
2835 pos += 1;
2836 }
2837
2838 if literals_start < current_len {
2839 let current = self.window.back().unwrap().as_slice();
2840 handle_sequence(Sequence::Literals {
2841 literals: ¤t[literals_start..],
2842 });
2843 }
2844 }
2845
2846 fn ensure_tables(&mut self) {
2847 if self.hash_table.is_empty() {
2848 self.hash_table = alloc::vec![HC_EMPTY; 1 << self.hash_log];
2849 self.chain_table = alloc::vec![HC_EMPTY; 1 << self.chain_log];
2850 }
2851 }
2852
2853 fn compact_history(&mut self) {
2854 if self.history_start == 0 {
2855 return;
2856 }
2857 if self.history_start >= self.max_window_size
2858 || self.history_start * 2 >= self.history.len()
2859 {
2860 self.history.drain(..self.history_start);
2861 self.history_start = 0;
2862 }
2863 }
2864
2865 fn live_history(&self) -> &[u8] {
2866 &self.history[self.history_start..]
2867 }
2868
2869 fn history_abs_end(&self) -> usize {
2870 self.history_abs_start + self.live_history().len()
2871 }
2872
2873 fn hash_position(&self, data: &[u8]) -> usize {
2874 let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
2875 ((value.wrapping_mul(HASH_MIX_PRIME)) >> (64 - self.hash_log)) as usize
2876 }
2877
2878 fn relative_position(&self, abs_pos: usize) -> Option<u32> {
2879 let rel = abs_pos.checked_sub(self.position_base)?;
2880 let rel_u32 = u32::try_from(rel).ok()?;
2881 (rel_u32 < u32::MAX).then_some(rel_u32)
2885 }
2886
2887 fn maybe_rebase_positions(&mut self, abs_pos: usize) {
2888 let needs_rebase = self
2889 .relative_position(abs_pos)
2890 .is_none_or(|relative| relative >= u32::MAX - 1);
2891 if !needs_rebase {
2892 return;
2893 }
2894
2895 self.position_base = self.history_abs_start;
2897 self.hash_table.fill(HC_EMPTY);
2898 self.chain_table.fill(HC_EMPTY);
2899
2900 let history_start = self.history_abs_start;
2901 for pos in history_start..abs_pos {
2904 self.insert_position_no_rebase(pos);
2905 }
2906 }
2907
2908 fn insert_position(&mut self, abs_pos: usize) {
2909 self.maybe_rebase_positions(abs_pos);
2910 self.insert_position_no_rebase(abs_pos);
2911 }
2912
2913 fn insert_position_no_rebase(&mut self, abs_pos: usize) {
2914 let idx = abs_pos - self.history_abs_start;
2915 let concat = self.live_history();
2916 if idx + 4 > concat.len() {
2917 return;
2918 }
2919 let hash = self.hash_position(&concat[idx..]);
2920 let Some(relative_pos) = self.relative_position(abs_pos) else {
2921 return;
2922 };
2923 let stored = relative_pos + 1;
2924 let chain_idx = relative_pos as usize & ((1 << self.chain_log) - 1);
2925 let prev = self.hash_table[hash];
2926 self.chain_table[chain_idx] = prev;
2927 self.hash_table[hash] = stored;
2928 }
2929
2930 fn insert_positions(&mut self, start: usize, end: usize) {
2931 for pos in start..end {
2932 self.insert_position(pos);
2933 }
2934 }
2935
2936 fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
2937 if step == 0 {
2938 return;
2939 }
2940 let mut pos = start;
2941 while pos < end {
2942 self.insert_position(pos);
2943 let next = pos.saturating_add(step);
2944 if next <= pos {
2945 break;
2946 }
2947 pos = next;
2948 }
2949 }
2950
2951 fn chain_candidates(&self, abs_pos: usize) -> [usize; MAX_HC_SEARCH_DEPTH] {
2954 let mut buf = [usize::MAX; MAX_HC_SEARCH_DEPTH];
2955 let idx = abs_pos - self.history_abs_start;
2956 let concat = self.live_history();
2957 if idx + 4 > concat.len() {
2958 return buf;
2959 }
2960 let hash = self.hash_position(&concat[idx..]);
2961 let chain_mask = (1 << self.chain_log) - 1;
2962
2963 let mut cur = self.hash_table[hash];
2964 let mut filled = 0;
2965 let mut steps = 0;
2973 let max_chain_steps = self.search_depth * 4;
2974 while filled < self.search_depth && steps < max_chain_steps {
2975 if cur == HC_EMPTY {
2976 break;
2977 }
2978 let candidate_rel = cur.wrapping_sub(1) as usize;
2979 let candidate_abs = self.position_base + candidate_rel;
2980 let next = self.chain_table[candidate_rel & chain_mask];
2981 steps += 1;
2982 if next == cur {
2983 if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos {
2986 buf[filled] = candidate_abs;
2987 }
2988 break;
2989 }
2990 cur = next;
2991 if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos {
2992 continue;
2993 }
2994 buf[filled] = candidate_abs;
2995 filled += 1;
2996 }
2997 buf
2998 }
2999
3000 fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
3001 let rep = self.repcode_candidate(abs_pos, lit_len);
3002 let hash = self.hash_chain_candidate(abs_pos, lit_len);
3003 Self::better_candidate(rep, hash)
3004 }
3005
3006 fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
3007 let concat = self.live_history();
3008 let current_idx = abs_pos - self.history_abs_start;
3009 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
3010 return None;
3011 }
3012
3013 let mut best: Option<MatchCandidate> = None;
3014 for candidate_abs in self.chain_candidates(abs_pos) {
3015 if candidate_abs == usize::MAX {
3016 break;
3017 }
3018 let candidate_idx = candidate_abs - self.history_abs_start;
3019 let match_len =
3020 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
3021 if match_len >= HC_MIN_MATCH_LEN {
3022 let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len);
3023 best = Self::better_candidate(best, Some(candidate));
3024 if best.is_some_and(|b| b.match_len >= self.target_len) {
3025 return best;
3026 }
3027 }
3028 }
3029 best
3030 }
3031
3032 fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option<MatchCandidate> {
3033 let reps = if lit_len == 0 {
3034 [
3035 Some(self.offset_hist[1] as usize),
3036 Some(self.offset_hist[2] as usize),
3037 (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize),
3038 ]
3039 } else {
3040 [
3041 Some(self.offset_hist[0] as usize),
3042 Some(self.offset_hist[1] as usize),
3043 Some(self.offset_hist[2] as usize),
3044 ]
3045 };
3046
3047 let concat = self.live_history();
3048 let current_idx = abs_pos - self.history_abs_start;
3049 if current_idx + HC_MIN_MATCH_LEN > concat.len() {
3050 return None;
3051 }
3052
3053 let mut best = None;
3054 for rep in reps.into_iter().flatten() {
3055 if rep == 0 || rep > abs_pos {
3056 continue;
3057 }
3058 let candidate_pos = abs_pos - rep;
3059 if candidate_pos < self.history_abs_start {
3060 continue;
3061 }
3062 let candidate_idx = candidate_pos - self.history_abs_start;
3063 let match_len =
3064 MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]);
3065 if match_len >= HC_MIN_MATCH_LEN {
3066 let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len);
3067 best = Self::better_candidate(best, Some(candidate));
3068 }
3069 }
3070 best
3071 }
3072
3073 fn extend_backwards(
3074 &self,
3075 mut candidate_pos: usize,
3076 mut abs_pos: usize,
3077 mut match_len: usize,
3078 lit_len: usize,
3079 ) -> MatchCandidate {
3080 let concat = self.live_history();
3081 let min_abs_pos = abs_pos - lit_len;
3082 while abs_pos > min_abs_pos
3083 && candidate_pos > self.history_abs_start
3084 && concat[candidate_pos - self.history_abs_start - 1]
3085 == concat[abs_pos - self.history_abs_start - 1]
3086 {
3087 candidate_pos -= 1;
3088 abs_pos -= 1;
3089 match_len += 1;
3090 }
3091 MatchCandidate {
3092 start: abs_pos,
3093 offset: abs_pos - candidate_pos,
3094 match_len,
3095 }
3096 }
3097
3098 fn better_candidate(
3099 lhs: Option<MatchCandidate>,
3100 rhs: Option<MatchCandidate>,
3101 ) -> Option<MatchCandidate> {
3102 match (lhs, rhs) {
3103 (None, other) | (other, None) => other,
3104 (Some(lhs), Some(rhs)) => {
3105 let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset);
3106 let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset);
3107 if rhs_gain > lhs_gain {
3108 Some(rhs)
3109 } else {
3110 Some(lhs)
3111 }
3112 }
3113 }
3114 }
3115
3116 fn match_gain(match_len: usize, offset: usize) -> i32 {
3117 debug_assert!(
3118 offset > 0,
3119 "zstd offsets are 1-indexed, offset=0 is invalid"
3120 );
3121 let offset_bits = 32 - (offset as u32).leading_zeros() as i32;
3122 (match_len as i32) * 4 - offset_bits
3123 }
3124
3125 fn pick_lazy_match(
3129 &self,
3130 abs_pos: usize,
3131 lit_len: usize,
3132 best: Option<MatchCandidate>,
3133 ) -> Option<MatchCandidate> {
3134 let best = best?;
3135 if best.match_len >= self.target_len
3136 || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end()
3137 {
3138 return Some(best);
3139 }
3140
3141 let current_gain = Self::match_gain(best.match_len, best.offset) + 4;
3142
3143 let next = self.find_best_match(abs_pos + 1, lit_len + 1);
3145 if let Some(next) = next {
3146 let next_gain = Self::match_gain(next.match_len, next.offset);
3147 if next_gain > current_gain {
3148 return None;
3149 }
3150 }
3151
3152 if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() {
3154 let next2 = self.find_best_match(abs_pos + 2, lit_len + 2);
3155 if let Some(next2) = next2 {
3156 let next2_gain = Self::match_gain(next2.match_len, next2.offset);
3157 if next2_gain > current_gain + 4 {
3159 return None;
3160 }
3161 }
3162 }
3163
3164 Some(best)
3165 }
3166}
3167
3168#[test]
3169fn matches() {
3170 let mut matcher = MatchGenerator::new(1000);
3171 let mut original_data = Vec::new();
3172 let mut reconstructed = Vec::new();
3173
3174 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
3175 Sequence::Literals { literals } => {
3176 assert!(!literals.is_empty());
3177 reconstructed.extend_from_slice(literals);
3178 }
3179 Sequence::Triple {
3180 literals,
3181 offset,
3182 match_len,
3183 } => {
3184 assert!(offset > 0);
3185 assert!(match_len >= MIN_MATCH_LEN);
3186 reconstructed.extend_from_slice(literals);
3187 assert!(offset <= reconstructed.len());
3188 let start = reconstructed.len() - offset;
3189 for i in 0..match_len {
3190 let byte = reconstructed[start + i];
3191 reconstructed.push(byte);
3192 }
3193 }
3194 };
3195
3196 matcher.add_data(
3197 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
3198 SuffixStore::with_capacity(100),
3199 |_, _| {},
3200 );
3201 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
3202
3203 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3204
3205 assert!(!matcher.next_sequence(|_| {}));
3206
3207 matcher.add_data(
3208 alloc::vec![
3209 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3210 ],
3211 SuffixStore::with_capacity(100),
3212 |_, _| {},
3213 );
3214 original_data.extend_from_slice(&[
3215 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3216 ]);
3217
3218 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3219 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3220 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3221 assert!(!matcher.next_sequence(|_| {}));
3222
3223 matcher.add_data(
3224 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
3225 SuffixStore::with_capacity(100),
3226 |_, _| {},
3227 );
3228 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
3229
3230 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3231 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3232 assert!(!matcher.next_sequence(|_| {}));
3233
3234 matcher.add_data(
3235 alloc::vec![0, 0, 0, 0, 0],
3236 SuffixStore::with_capacity(100),
3237 |_, _| {},
3238 );
3239 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
3240
3241 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3242 assert!(!matcher.next_sequence(|_| {}));
3243
3244 matcher.add_data(
3245 alloc::vec![7, 8, 9, 10, 11],
3246 SuffixStore::with_capacity(100),
3247 |_, _| {},
3248 );
3249 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
3250
3251 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3252 assert!(!matcher.next_sequence(|_| {}));
3253
3254 matcher.add_data(
3255 alloc::vec![1, 3, 5, 7, 9],
3256 SuffixStore::with_capacity(100),
3257 |_, _| {},
3258 );
3259 matcher.skip_matching();
3260 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
3261 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
3262 assert!(!matcher.next_sequence(|_| {}));
3263
3264 matcher.add_data(
3265 alloc::vec![1, 3, 5, 7, 9],
3266 SuffixStore::with_capacity(100),
3267 |_, _| {},
3268 );
3269 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
3270
3271 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3272 assert!(!matcher.next_sequence(|_| {}));
3273
3274 matcher.add_data(
3275 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
3276 SuffixStore::with_capacity(100),
3277 |_, _| {},
3278 );
3279 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
3280
3281 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3282 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3283 assert!(!matcher.next_sequence(|_| {}));
3284
3285 assert_eq!(reconstructed, original_data);
3286}
3287
3288#[test]
3289fn dfast_matches_roundtrip_multi_block_pattern() {
3290 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
3291 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3292 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3293
3294 let mut matcher = DfastMatchGenerator::new(1 << 22);
3295 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3296 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3297 Sequence::Triple {
3298 literals,
3299 offset,
3300 match_len,
3301 } => {
3302 decoded.extend_from_slice(literals);
3303 let start = decoded.len() - offset;
3304 for i in 0..match_len {
3305 let byte = decoded[start + i];
3306 decoded.push(byte);
3307 }
3308 }
3309 };
3310
3311 matcher.add_data(first_block.clone(), |_| {});
3312 let mut history = Vec::new();
3313 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3314 assert_eq!(history, first_block);
3315
3316 matcher.add_data(second_block.clone(), |_| {});
3317 let prefix_len = history.len();
3318 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3319
3320 assert_eq!(&history[prefix_len..], second_block.as_slice());
3321}
3322
3323#[test]
3324fn driver_switches_backends_and_initializes_dfast_via_reset() {
3325 let mut driver = MatchGeneratorDriver::new(32, 2);
3326
3327 driver.reset(CompressionLevel::Default);
3328 assert_eq!(driver.active_backend, MatcherBackend::Dfast);
3329 assert_eq!(driver.window_size(), (1u64 << 22));
3330
3331 let mut first = driver.get_next_space();
3332 first[..12].copy_from_slice(b"abcabcabcabc");
3333 first.truncate(12);
3334 driver.commit_space(first);
3335 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
3336 driver.skip_matching_with_hint(None);
3337
3338 let mut second = driver.get_next_space();
3339 second[..12].copy_from_slice(b"abcabcabcabc");
3340 second.truncate(12);
3341 driver.commit_space(second);
3342
3343 let mut reconstructed = b"abcabcabcabc".to_vec();
3344 driver.start_matching(|seq| match seq {
3345 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
3346 Sequence::Triple {
3347 literals,
3348 offset,
3349 match_len,
3350 } => {
3351 reconstructed.extend_from_slice(literals);
3352 let start = reconstructed.len() - offset;
3353 for i in 0..match_len {
3354 let byte = reconstructed[start + i];
3355 reconstructed.push(byte);
3356 }
3357 }
3358 });
3359 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
3360
3361 driver.reset(CompressionLevel::Fastest);
3362 assert_eq!(driver.window_size(), (1u64 << 17));
3363}
3364
3365#[test]
3366fn driver_level4_selects_row_backend() {
3367 let mut driver = MatchGeneratorDriver::new(32, 2);
3368 driver.reset(CompressionLevel::Level(4));
3369 assert_eq!(driver.active_backend, MatcherBackend::Row);
3370}
3371
3372#[test]
3373fn driver_small_source_hint_shrinks_dfast_hash_tables() {
3374 let mut driver = MatchGeneratorDriver::new(32, 2);
3375
3376 driver.reset(CompressionLevel::Level(2));
3377 let mut space = driver.get_next_space();
3378 space[..12].copy_from_slice(b"abcabcabcabc");
3379 space.truncate(12);
3380 driver.commit_space(space);
3381 driver.skip_matching_with_hint(None);
3382 let full_tables = driver.dfast_matcher().short_hash.len();
3383 assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
3384
3385 driver.set_source_size_hint(1024);
3386 driver.reset(CompressionLevel::Level(2));
3387 let mut space = driver.get_next_space();
3388 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3389 space.truncate(12);
3390 driver.commit_space(space);
3391 driver.skip_matching_with_hint(None);
3392 let hinted_tables = driver.dfast_matcher().short_hash.len();
3393
3394 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
3395 assert_eq!(hinted_tables, 1 << MIN_HINTED_WINDOW_LOG);
3396 assert!(
3397 hinted_tables < full_tables,
3398 "tiny source hint should reduce dfast table footprint"
3399 );
3400}
3401
3402#[test]
3403fn driver_small_source_hint_shrinks_row_hash_tables() {
3404 let mut driver = MatchGeneratorDriver::new(32, 2);
3405
3406 driver.reset(CompressionLevel::Level(4));
3407 let mut space = driver.get_next_space();
3408 space[..12].copy_from_slice(b"abcabcabcabc");
3409 space.truncate(12);
3410 driver.commit_space(space);
3411 driver.skip_matching_with_hint(None);
3412 let full_rows = driver.row_matcher().row_heads.len();
3413 assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
3414
3415 driver.set_source_size_hint(1024);
3416 driver.reset(CompressionLevel::Level(4));
3417 let mut space = driver.get_next_space();
3418 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3419 space.truncate(12);
3420 driver.commit_space(space);
3421 driver.skip_matching_with_hint(None);
3422 let hinted_rows = driver.row_matcher().row_heads.len();
3423
3424 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
3425 assert_eq!(
3426 hinted_rows,
3427 1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
3428 );
3429 assert!(
3430 hinted_rows < full_rows,
3431 "tiny source hint should reduce row hash table footprint"
3432 );
3433}
3434
3435#[test]
3436fn row_matches_roundtrip_multi_block_pattern() {
3437 let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
3438 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3439 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3440
3441 let mut matcher = RowMatchGenerator::new(1 << 22);
3442 matcher.configure(ROW_CONFIG);
3443 matcher.ensure_tables();
3444 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3445 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3446 Sequence::Triple {
3447 literals,
3448 offset,
3449 match_len,
3450 } => {
3451 decoded.extend_from_slice(literals);
3452 let start = decoded.len() - offset;
3453 for i in 0..match_len {
3454 let byte = decoded[start + i];
3455 decoded.push(byte);
3456 }
3457 }
3458 };
3459
3460 matcher.add_data(first_block.clone(), |_| {});
3461 let mut history = Vec::new();
3462 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3463 assert_eq!(history, first_block);
3464
3465 matcher.add_data(second_block.clone(), |_| {});
3466 let prefix_len = history.len();
3467 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3468
3469 assert_eq!(&history[prefix_len..], second_block.as_slice());
3470
3471 let third_block: Vec<u8> = (0u8..=255).collect();
3473 matcher.add_data(third_block.clone(), |_| {});
3474 let third_prefix = history.len();
3475 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3476 assert_eq!(&history[third_prefix..], third_block.as_slice());
3477}
3478
3479#[test]
3480fn row_short_block_emits_literals_only() {
3481 let mut matcher = RowMatchGenerator::new(1 << 22);
3482 matcher.configure(ROW_CONFIG);
3483
3484 matcher.add_data(b"abcde".to_vec(), |_| {});
3485
3486 let mut saw_triple = false;
3487 let mut reconstructed = Vec::new();
3488 matcher.start_matching(|seq| match seq {
3489 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
3490 Sequence::Triple { .. } => saw_triple = true,
3491 });
3492
3493 assert!(
3494 !saw_triple,
3495 "row backend must not emit triples for short blocks"
3496 );
3497 assert_eq!(reconstructed, b"abcde");
3498
3499 saw_triple = false;
3501 matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
3502 matcher.start_matching(|seq| {
3503 if let Sequence::Triple { .. } = seq {
3504 saw_triple = true;
3505 }
3506 });
3507 assert!(
3508 saw_triple,
3509 "row backend should emit triples on repeated data"
3510 );
3511}
3512
3513#[test]
3514fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
3515 let mut matcher = RowMatchGenerator::new(1 << 22);
3516 matcher.configure(ROW_CONFIG);
3517 matcher.add_data(b"abcabc".to_vec(), |_| {});
3518
3519 let best = MatchCandidate {
3520 start: 0,
3521 offset: 1,
3522 match_len: ROW_MIN_MATCH_LEN,
3523 };
3524 let picked = matcher
3525 .pick_lazy_match(0, 0, Some(best))
3526 .expect("best candidate must survive");
3527
3528 assert_eq!(picked.start, best.start);
3529 assert_eq!(picked.offset, best.offset);
3530 assert_eq!(picked.match_len, best.match_len);
3531}
3532
3533#[test]
3534fn row_backfills_previous_block_tail_for_cross_boundary_match() {
3535 let mut matcher = RowMatchGenerator::new(1 << 22);
3536 matcher.configure(ROW_CONFIG);
3537
3538 let mut first_block = alloc::vec![0xA5; 64];
3539 first_block.extend_from_slice(b"XYZ");
3540 let second_block = b"XYZXYZtail".to_vec();
3541
3542 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3543 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3544 Sequence::Triple {
3545 literals,
3546 offset,
3547 match_len,
3548 } => {
3549 decoded.extend_from_slice(literals);
3550 let start = decoded.len() - offset;
3551 for i in 0..match_len {
3552 let byte = decoded[start + i];
3553 decoded.push(byte);
3554 }
3555 }
3556 };
3557
3558 matcher.add_data(first_block.clone(), |_| {});
3559 let mut reconstructed = Vec::new();
3560 matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
3561 assert_eq!(reconstructed, first_block);
3562
3563 matcher.add_data(second_block.clone(), |_| {});
3564 let mut saw_cross_boundary = false;
3565 let prefix_len = reconstructed.len();
3566 matcher.start_matching(|seq| {
3567 if let Sequence::Triple {
3568 literals,
3569 offset,
3570 match_len,
3571 } = seq
3572 && literals.is_empty()
3573 && offset == 3
3574 && match_len >= ROW_MIN_MATCH_LEN
3575 {
3576 saw_cross_boundary = true;
3577 }
3578 replay_sequence(&mut reconstructed, seq);
3579 });
3580
3581 assert!(
3582 saw_cross_boundary,
3583 "row matcher should reuse the 3-byte previous-block tail"
3584 );
3585 assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
3586}
3587
3588#[test]
3589fn row_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
3590 let data = deterministic_high_entropy_bytes(0xA713_9C5D_44E2_10B1, 4096);
3591
3592 let mut dense = RowMatchGenerator::new(1 << 22);
3593 dense.configure(ROW_CONFIG);
3594 dense.add_data(data.clone(), |_| {});
3595 dense.skip_matching_with_hint(Some(false));
3596 let dense_slots = dense
3597 .row_positions
3598 .iter()
3599 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
3600 .count();
3601
3602 let mut sparse = RowMatchGenerator::new(1 << 22);
3603 sparse.configure(ROW_CONFIG);
3604 sparse.add_data(data, |_| {});
3605 sparse.skip_matching_with_hint(Some(true));
3606 let sparse_slots = sparse
3607 .row_positions
3608 .iter()
3609 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
3610 .count();
3611
3612 assert!(
3613 sparse_slots < dense_slots,
3614 "incompressible hint should seed fewer row slots (sparse={sparse_slots}, dense={dense_slots})"
3615 );
3616}
3617
3618#[test]
3619fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
3620 let mut driver = MatchGeneratorDriver::new(32, 2);
3621
3622 driver.reset(CompressionLevel::Level(2));
3623 let mut space = driver.get_next_space();
3624 space[..12].copy_from_slice(b"abcabcabcabc");
3625 space.truncate(12);
3626 driver.commit_space(space);
3627 driver.skip_matching_with_hint(None);
3628
3629 let table_len = driver.dfast_matcher().short_hash.len();
3630 assert_eq!(
3631 table_len,
3632 1 << DFAST_HASH_BITS,
3633 "unhinted Level(2) should keep default dfast table size"
3634 );
3635}
3636
3637#[test]
3638fn simple_backend_rejects_undersized_pooled_suffix_store() {
3639 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3640 driver.reset(CompressionLevel::Fastest);
3641
3642 driver.suffix_pool.push(SuffixStore::with_capacity(1024));
3643
3644 let mut space = driver.get_next_space();
3645 space.clear();
3646 space.resize(4096, 0xAB);
3647 driver.commit_space(space);
3648
3649 let last_suffix_slots = driver
3650 .match_generator
3651 .window
3652 .last()
3653 .expect("window entry must exist after commit")
3654 .suffixes
3655 .slots
3656 .len();
3657 assert!(
3658 last_suffix_slots >= 4096,
3659 "undersized pooled suffix store must not be reused for larger blocks"
3660 );
3661}
3662
3663#[test]
3664fn source_hint_clamps_driver_slice_size_to_window() {
3665 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3666 driver.set_source_size_hint(1024);
3667 driver.reset(CompressionLevel::Default);
3668
3669 let window = driver.window_size() as usize;
3670 assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
3671 assert_eq!(driver.slice_size, window);
3672
3673 let space = driver.get_next_space();
3674 assert_eq!(space.len(), window);
3675 driver.commit_space(space);
3676}
3677
3678#[test]
3679fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
3680 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
3681 driver.reset(CompressionLevel::Default);
3682
3683 let large = driver.get_next_space();
3684 let large_capacity = large.capacity();
3685 assert!(large_capacity >= 128 * 1024);
3686 driver.commit_space(large);
3687
3688 driver.set_source_size_hint(1024);
3689 driver.reset(CompressionLevel::Default);
3690
3691 let small = driver.get_next_space();
3692 assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
3693 assert!(
3694 small.capacity() >= large_capacity,
3695 "pooled buffer capacity should be preserved to avoid shrink/grow churn"
3696 );
3697}
3698
3699#[test]
3700fn driver_best_to_fastest_releases_oversized_hc_tables() {
3701 let mut driver = MatchGeneratorDriver::new(32, 2);
3702
3703 driver.reset(CompressionLevel::Best);
3705 assert_eq!(driver.window_size(), (1u64 << 24));
3706
3707 let mut space = driver.get_next_space();
3709 space[..12].copy_from_slice(b"abcabcabcabc");
3710 space.truncate(12);
3711 driver.commit_space(space);
3712 driver.skip_matching_with_hint(None);
3713
3714 driver.reset(CompressionLevel::Fastest);
3716 assert_eq!(driver.window_size(), (1u64 << 17));
3717
3718 let hc = driver.hc_match_generator.as_ref().unwrap();
3720 assert!(
3721 hc.hash_table.is_empty(),
3722 "HC hash_table should be released after switching away from Best"
3723 );
3724 assert!(
3725 hc.chain_table.is_empty(),
3726 "HC chain_table should be released after switching away from Best"
3727 );
3728}
3729
3730#[test]
3731fn driver_better_to_best_resizes_hc_tables() {
3732 let mut driver = MatchGeneratorDriver::new(32, 2);
3733
3734 driver.reset(CompressionLevel::Better);
3736 assert_eq!(driver.window_size(), (1u64 << 23));
3737
3738 let mut space = driver.get_next_space();
3739 space[..12].copy_from_slice(b"abcabcabcabc");
3740 space.truncate(12);
3741 driver.commit_space(space);
3742 driver.skip_matching_with_hint(None);
3743
3744 let hc = driver.hc_match_generator.as_ref().unwrap();
3745 let better_hash_len = hc.hash_table.len();
3746 let better_chain_len = hc.chain_table.len();
3747
3748 driver.reset(CompressionLevel::Best);
3750 assert_eq!(driver.window_size(), (1u64 << 24));
3751
3752 let mut space = driver.get_next_space();
3754 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
3755 space.truncate(12);
3756 driver.commit_space(space);
3757 driver.skip_matching_with_hint(None);
3758
3759 let hc = driver.hc_match_generator.as_ref().unwrap();
3760 assert!(
3761 hc.hash_table.len() > better_hash_len,
3762 "Best hash_table ({}) should be larger than Better ({})",
3763 hc.hash_table.len(),
3764 better_hash_len
3765 );
3766 assert!(
3767 hc.chain_table.len() > better_chain_len,
3768 "Best chain_table ({}) should be larger than Better ({})",
3769 hc.chain_table.len(),
3770 better_chain_len
3771 );
3772}
3773
3774#[test]
3775fn prime_with_dictionary_preserves_history_for_first_full_block() {
3776 let mut driver = MatchGeneratorDriver::new(8, 1);
3777 driver.reset(CompressionLevel::Fastest);
3778
3779 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3780
3781 let mut space = driver.get_next_space();
3782 space.clear();
3783 space.extend_from_slice(b"abcdefgh");
3784 driver.commit_space(space);
3785
3786 let mut saw_match = false;
3787 driver.start_matching(|seq| {
3788 if let Sequence::Triple {
3789 literals,
3790 offset,
3791 match_len,
3792 } = seq
3793 && literals.is_empty()
3794 && offset == 8
3795 && match_len >= MIN_MATCH_LEN
3796 {
3797 saw_match = true;
3798 }
3799 });
3800
3801 assert!(
3802 saw_match,
3803 "first full block should still match dictionary-primed history"
3804 );
3805}
3806
3807#[test]
3808fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
3809 let mut driver = MatchGeneratorDriver::new(8, 1);
3810 driver.reset(CompressionLevel::Fastest);
3811
3812 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3813
3814 let mut space = driver.get_next_space();
3815 space.clear();
3816 space.extend_from_slice(b"abcdefgh");
3817 driver.commit_space(space);
3818
3819 let mut saw_match = false;
3820 driver.start_matching(|seq| {
3821 if let Sequence::Triple {
3822 literals,
3823 offset,
3824 match_len,
3825 } = seq
3826 && literals.is_empty()
3827 && offset == 24
3828 && match_len >= MIN_MATCH_LEN
3829 {
3830 saw_match = true;
3831 }
3832 });
3833
3834 assert!(
3835 saw_match,
3836 "dictionary bytes should remain addressable until frame output exceeds the live window"
3837 );
3838}
3839
3840#[test]
3841fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
3842 let mut driver = MatchGeneratorDriver::new(8, 1);
3843 driver.reset(CompressionLevel::Fastest);
3844
3845 driver.prime_with_dictionary(&[], [11, 7, 3]);
3846
3847 assert_eq!(driver.match_generator.offset_hist, [11, 7, 3]);
3848}
3849
3850#[test]
3851fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
3852 let mut driver = MatchGeneratorDriver::new(8, 1);
3853 driver.reset(CompressionLevel::Level(2));
3854
3855 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3856
3857 let mut space = driver.get_next_space();
3858 space.clear();
3859 space.extend_from_slice(b"abcdefgh");
3860 driver.commit_space(space);
3861
3862 let mut saw_match = false;
3863 driver.start_matching(|seq| {
3864 if let Sequence::Triple {
3865 literals,
3866 offset,
3867 match_len,
3868 } = seq
3869 && literals.is_empty()
3870 && offset == 8
3871 && match_len >= DFAST_MIN_MATCH_LEN
3872 {
3873 saw_match = true;
3874 }
3875 });
3876
3877 assert!(
3878 saw_match,
3879 "dfast backend should match dictionary-primed history in first full block"
3880 );
3881}
3882
3883#[test]
3884fn prime_with_dictionary_does_not_inflate_reported_window_size() {
3885 let mut driver = MatchGeneratorDriver::new(8, 1);
3886 driver.reset(CompressionLevel::Fastest);
3887
3888 let before = driver.window_size();
3889 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
3890 let after = driver.window_size();
3891
3892 assert_eq!(
3893 after, before,
3894 "dictionary retention budget must not change reported frame window size"
3895 );
3896}
3897
3898#[test]
3899fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
3900 let mut driver = MatchGeneratorDriver::new(8, 2);
3901 driver.reset(CompressionLevel::Fastest);
3902
3903 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3906
3907 assert!(
3908 driver
3909 .match_generator
3910 .window
3911 .iter()
3912 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
3913 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
3914 );
3915}
3916
3917#[test]
3918fn prime_with_dictionary_counts_only_committed_tail_budget() {
3919 let mut driver = MatchGeneratorDriver::new(8, 1);
3920 driver.reset(CompressionLevel::Fastest);
3921
3922 let before = driver.match_generator.max_window_size;
3923 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3925
3926 assert_eq!(
3927 driver.match_generator.max_window_size,
3928 before + 8,
3929 "retention budget must account only for dictionary bytes actually committed to history"
3930 );
3931}
3932
3933#[test]
3934fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
3935 let mut driver = MatchGeneratorDriver::new(8, 1);
3936 driver.reset(CompressionLevel::Level(2));
3937
3938 let before = driver.dfast_matcher().max_window_size;
3939 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
3942
3943 assert_eq!(
3944 driver.dfast_matcher().max_window_size,
3945 before + 12,
3946 "dfast retention budget should include 4-byte dictionary tails"
3947 );
3948}
3949
3950#[test]
3951fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
3952 let mut driver = MatchGeneratorDriver::new(8, 1);
3953 driver.reset(CompressionLevel::Level(4));
3954
3955 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
3956
3957 let mut space = driver.get_next_space();
3958 space.clear();
3959 space.extend_from_slice(b"abcdefgh");
3960 driver.commit_space(space);
3961
3962 let mut saw_match = false;
3963 driver.start_matching(|seq| {
3964 if let Sequence::Triple {
3965 literals,
3966 offset,
3967 match_len,
3968 } = seq
3969 && literals.is_empty()
3970 && offset == 8
3971 && match_len >= ROW_MIN_MATCH_LEN
3972 {
3973 saw_match = true;
3974 }
3975 });
3976
3977 assert!(
3978 saw_match,
3979 "row backend should match dictionary-primed history in first full block"
3980 );
3981}
3982
3983#[test]
3984fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
3985 let mut driver = MatchGeneratorDriver::new(8, 1);
3986 driver.reset(CompressionLevel::Level(4));
3987
3988 let base_window = driver.row_matcher().max_window_size;
3989 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
3992
3993 assert_eq!(
3994 driver.row_matcher().max_window_size,
3995 base_window + 8,
3996 "row retained window must exclude uncommitted 1-byte tail"
3997 );
3998}
3999
4000#[test]
4001fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
4002 let mut driver = MatchGeneratorDriver::new(8, 1);
4003 driver.reset(CompressionLevel::Level(4));
4004 driver.row_matcher_mut().max_window_size = 8;
4006 driver.reported_window_size = 8;
4007
4008 let base_window = driver.row_matcher().max_window_size;
4009 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4010 assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
4011
4012 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4013 let mut space = driver.get_next_space();
4014 space.clear();
4015 space.extend_from_slice(block);
4016 driver.commit_space(space);
4017 driver.skip_matching_with_hint(None);
4018 }
4019
4020 assert_eq!(
4021 driver.dictionary_retained_budget, 0,
4022 "dictionary budget should be fully retired once primed dict slices are evicted"
4023 );
4024 assert_eq!(
4025 driver.row_matcher().max_window_size,
4026 base_window,
4027 "retired dictionary budget must not remain reusable for live history"
4028 );
4029}
4030
4031#[test]
4032fn row_get_last_space_and_reset_to_fastest_clears_window() {
4033 let mut driver = MatchGeneratorDriver::new(8, 1);
4034 driver.reset(CompressionLevel::Level(4));
4035
4036 let mut space = driver.get_next_space();
4037 space.clear();
4038 space.extend_from_slice(b"row-data");
4039 driver.commit_space(space);
4040
4041 assert_eq!(driver.get_last_space(), b"row-data");
4042
4043 driver.reset(CompressionLevel::Fastest);
4044 assert_eq!(driver.active_backend, MatcherBackend::Simple);
4045 assert!(driver.row_matcher().window.is_empty());
4046}
4047
4048#[test]
4050fn driver_reset_from_row_backend_reclaims_row_buffer_pool() {
4051 let mut driver = MatchGeneratorDriver::new(8, 1);
4052 driver.reset(CompressionLevel::Level(4));
4053 assert_eq!(driver.active_backend, MatcherBackend::Row);
4054
4055 let _ = driver.row_matcher();
4058 let mut space = driver.get_next_space();
4059 space.extend_from_slice(b"row-data-to-recycle");
4060 driver.commit_space(space);
4061
4062 let before_pool = driver.vec_pool.len();
4063 driver.reset(CompressionLevel::Fastest);
4064
4065 assert_eq!(driver.active_backend, MatcherBackend::Simple);
4066 let row = driver
4067 .row_match_generator
4068 .as_ref()
4069 .expect("row matcher should remain allocated after switch");
4070 assert!(row.row_heads.is_empty());
4071 assert!(row.row_positions.is_empty());
4072 assert!(row.row_tags.is_empty());
4073 assert!(
4074 driver.vec_pool.len() >= before_pool,
4075 "row reset should recycle row history buffers"
4076 );
4077}
4078
4079#[test]
4081fn driver_reset_from_row_backend_tolerates_missing_row_matcher() {
4082 let mut driver = MatchGeneratorDriver::new(8, 1);
4083 driver.active_backend = MatcherBackend::Row;
4084 driver.row_match_generator = None;
4085
4086 driver.reset(CompressionLevel::Fastest);
4087
4088 assert_eq!(driver.active_backend, MatcherBackend::Simple);
4089}
4090
4091#[test]
4092fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
4093 let mut params = resolve_level_params(CompressionLevel::Level(4), None);
4094 params.window_log = 22;
4095 let adjusted = adjust_params_for_source_size(params, 0);
4096 assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
4097}
4098
4099#[test]
4100fn common_prefix_len_matches_scalar_reference_across_offsets() {
4101 fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
4102 a.iter()
4103 .zip(b.iter())
4104 .take_while(|(lhs, rhs)| lhs == rhs)
4105 .count()
4106 }
4107
4108 for total_len in [
4109 0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
4110 ] {
4111 let base: Vec<u8> = (0..total_len)
4112 .map(|i| ((i * 13 + 7) & 0xFF) as u8)
4113 .collect();
4114
4115 for start in [0usize, 1, 3] {
4116 if start > total_len {
4117 continue;
4118 }
4119 let a = &base[start..];
4120 let b = a.to_vec();
4121 assert_eq!(
4122 MatchGenerator::common_prefix_len(a, &b),
4123 scalar_reference(a, &b),
4124 "equal slices total_len={total_len} start={start}"
4125 );
4126
4127 let len = a.len();
4128 for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
4129 if mismatch >= len {
4130 continue;
4131 }
4132 let mut altered = b.clone();
4133 altered[mismatch] ^= 0x5A;
4134 assert_eq!(
4135 MatchGenerator::common_prefix_len(a, &altered),
4136 scalar_reference(a, &altered),
4137 "total_len={total_len} start={start} mismatch={mismatch}"
4138 );
4139 }
4140
4141 if len > 0 {
4142 let mismatch = len - 1;
4143 let mut altered = b.clone();
4144 altered[mismatch] ^= 0xA5;
4145 assert_eq!(
4146 MatchGenerator::common_prefix_len(a, &altered),
4147 scalar_reference(a, &altered),
4148 "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
4149 );
4150 }
4151 }
4152 }
4153
4154 let long = alloc::vec![0xAB; 320];
4155 let shorter = alloc::vec![0xAB; 137];
4156 assert_eq!(
4157 MatchGenerator::common_prefix_len(&long, &shorter),
4158 scalar_reference(&long, &shorter)
4159 );
4160}
4161
4162#[test]
4163fn row_pick_lazy_returns_none_when_next_is_better() {
4164 let mut matcher = RowMatchGenerator::new(1 << 22);
4165 matcher.configure(ROW_CONFIG);
4166 matcher.add_data(alloc::vec![b'a'; 64], |_| {});
4167 matcher.ensure_tables();
4168
4169 let abs_pos = matcher.history_abs_start + 16;
4170 let best = MatchCandidate {
4171 start: abs_pos,
4172 offset: 8,
4173 match_len: ROW_MIN_MATCH_LEN,
4174 };
4175 assert!(
4176 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
4177 "lazy picker should defer when next position is clearly better"
4178 );
4179}
4180
4181#[test]
4182fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
4183 let mut matcher = RowMatchGenerator::new(1 << 22);
4184 matcher.configure(ROW_CONFIG);
4185 matcher.lazy_depth = 2;
4186 matcher.search_depth = 0;
4187 matcher.offset_hist = [6, 9, 1];
4188
4189 let mut data = alloc::vec![b'x'; 40];
4190 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
4191 matcher.add_data(data, |_| {});
4192 matcher.ensure_tables();
4193
4194 let abs_pos = matcher.history_abs_start + 20;
4195 let best = matcher
4196 .best_match(abs_pos, 0)
4197 .expect("expected baseline repcode match");
4198 assert_eq!(best.offset, 9);
4199 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
4200
4201 if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
4202 assert!(next.match_len <= best.match_len);
4203 }
4204
4205 let next2 = matcher
4206 .best_match(abs_pos + 2, 2)
4207 .expect("expected +2 candidate");
4208 assert!(
4209 next2.match_len > best.match_len + 1,
4210 "+2 candidate must be significantly better for depth-2 lazy skip"
4211 );
4212 assert!(
4213 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
4214 "lazy picker should defer when +2 candidate is significantly better"
4215 );
4216}
4217
4218#[test]
4219fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
4220 let mut matcher = RowMatchGenerator::new(1 << 22);
4221 matcher.configure(ROW_CONFIG);
4222 matcher.lazy_depth = 2;
4223 matcher.search_depth = 0;
4224 matcher.offset_hist = [6, 9, 1];
4225
4226 let mut data = alloc::vec![b'x'; 40];
4227 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
4228 matcher.add_data(data, |_| {});
4229 matcher.ensure_tables();
4230
4231 let abs_pos = matcher.history_abs_start + 20;
4232 let best = matcher
4233 .best_match(abs_pos, 0)
4234 .expect("expected baseline repcode match");
4235 assert_eq!(best.offset, 9);
4236 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
4237
4238 let next2 = matcher
4239 .best_match(abs_pos + 2, 2)
4240 .expect("expected +2 candidate");
4241 assert_eq!(next2.match_len, best.match_len + 1);
4242 let chosen = matcher
4243 .pick_lazy_match(abs_pos, 0, Some(best))
4244 .expect("lazy picker should keep current best");
4245 assert_eq!(chosen.start, best.start);
4246 assert_eq!(chosen.offset, best.offset);
4247 assert_eq!(chosen.match_len, best.match_len);
4248}
4249
4250#[test]
4252fn row_hash_and_row_extracts_high_bits() {
4253 let mut matcher = RowMatchGenerator::new(1 << 22);
4254 matcher.configure(ROW_CONFIG);
4255 matcher.add_data(
4256 alloc::vec![
4257 0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
4258 0x70, 0x80,
4259 ],
4260 |_| {},
4261 );
4262 matcher.ensure_tables();
4263
4264 let pos = matcher.history_abs_start + 8;
4265 let (row, tag) = matcher
4266 .hash_and_row(pos)
4267 .expect("row hash should be available");
4268
4269 let idx = pos - matcher.history_abs_start;
4270 let concat = matcher.live_history();
4271 let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
4272 let hash = hash_mix_u64_with_kernel(value, matcher.hash_mix_kernel);
4273 let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
4274 let combined = hash >> (u64::BITS as usize - total_bits);
4275 let expected_row =
4276 ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
4277 let expected_tag = combined as u8;
4278
4279 assert_eq!(row, expected_row);
4280 assert_eq!(tag, expected_tag);
4281}
4282
4283#[test]
4284fn row_repcode_skips_candidate_before_history_start() {
4285 let mut matcher = RowMatchGenerator::new(1 << 22);
4286 matcher.configure(ROW_CONFIG);
4287 matcher.history = alloc::vec![b'a'; 20];
4288 matcher.history_start = 0;
4289 matcher.history_abs_start = 10;
4290 matcher.offset_hist = [3, 0, 0];
4291
4292 assert!(matcher.repcode_candidate(12, 1).is_none());
4293}
4294
4295#[test]
4296fn row_repcode_returns_none_when_position_too_close_to_history_end() {
4297 let mut matcher = RowMatchGenerator::new(1 << 22);
4298 matcher.configure(ROW_CONFIG);
4299 matcher.history = b"abcde".to_vec();
4300 matcher.history_start = 0;
4301 matcher.history_abs_start = 0;
4302 matcher.offset_hist = [1, 0, 0];
4303
4304 assert!(matcher.repcode_candidate(4, 1).is_none());
4305}
4306
4307#[cfg(all(feature = "std", target_arch = "x86_64"))]
4308#[test]
4309fn hash_mix_sse42_path_is_available_and_matches_accelerated_impl_when_supported() {
4310 if !is_x86_feature_detected!("sse4.2") {
4311 return;
4312 }
4313
4314 let kernel = detect_hash_mix_kernel();
4315 assert_eq!(kernel, HashMixKernel::X86Sse42);
4316 let v = 0x0123_4567_89AB_CDEFu64;
4317 let accelerated = unsafe { hash_mix_u64_sse42(v) };
4318 assert_eq!(hash_mix_u64_with_kernel(v, kernel), accelerated);
4319}
4320
4321#[cfg(all(feature = "std", target_arch = "x86_64"))]
4322#[test]
4323fn hash_mix_scalar_path_can_be_forced_for_coverage_and_matches_formula() {
4324 let v = 0x0123_4567_89AB_CDEFu64;
4325 let expected = v.wrapping_mul(HASH_MIX_PRIME);
4326 let mixed = hash_mix_u64_with_kernel(v, HashMixKernel::Scalar);
4327 assert_eq!(mixed, expected);
4328}
4329
4330#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
4331#[test]
4332fn hash_mix_crc_path_is_available_and_matches_accelerated_impl_when_supported() {
4333 if !is_aarch64_feature_detected!("crc") {
4334 return;
4335 }
4336
4337 let kernel = detect_hash_mix_kernel();
4338 assert_eq!(kernel, HashMixKernel::Aarch64Crc);
4339 let v = 0x0123_4567_89AB_CDEFu64;
4340 let accelerated = unsafe { hash_mix_u64_crc(v) };
4341 assert_eq!(hash_mix_u64_with_kernel(v, kernel), accelerated);
4342}
4343
4344#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
4345#[test]
4346fn hash_mix_scalar_path_can_be_forced_on_aarch64_and_matches_formula() {
4347 let v = 0x0123_4567_89AB_CDEFu64;
4348 let expected = v.wrapping_mul(HASH_MIX_PRIME);
4349 let mixed = hash_mix_u64_with_kernel(v, HashMixKernel::Scalar);
4350 assert_eq!(mixed, expected);
4351}
4352
4353#[test]
4354fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
4355 let mut matcher = RowMatchGenerator::new(1 << 22);
4356 matcher.configure(ROW_CONFIG);
4357 matcher.history = b"abcde".to_vec();
4358 matcher.history_start = 0;
4359 matcher.history_abs_start = 0;
4360
4361 assert!(matcher.row_candidate(0, 0).is_none());
4362}
4363
4364#[test]
4365fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
4366 let mut hc = HcMatchGenerator::new(32);
4367 hc.history = b"abc".to_vec();
4368 hc.history_start = 0;
4369 hc.history_abs_start = 0;
4370 hc.ensure_tables();
4371
4372 let candidates = hc.chain_candidates(0);
4373 assert!(candidates.iter().all(|&pos| pos == usize::MAX));
4374}
4375
4376#[test]
4377fn hc_reset_refills_existing_tables_with_empty_sentinel() {
4378 let mut hc = HcMatchGenerator::new(32);
4379 hc.add_data(b"abcdeabcde".to_vec(), |_| {});
4380 hc.ensure_tables();
4381 assert!(!hc.hash_table.is_empty());
4382 assert!(!hc.chain_table.is_empty());
4383 hc.hash_table.fill(123);
4384 hc.chain_table.fill(456);
4385
4386 hc.reset(|_| {});
4387
4388 assert!(hc.hash_table.iter().all(|&v| v == HC_EMPTY));
4389 assert!(hc.chain_table.iter().all(|&v| v == HC_EMPTY));
4390}
4391
4392#[test]
4393fn hc_start_matching_returns_early_for_empty_current_block() {
4394 let mut hc = HcMatchGenerator::new(32);
4395 hc.add_data(Vec::new(), |_| {});
4396 let mut called = false;
4397 hc.start_matching(|_| called = true);
4398 assert!(!called, "empty current block should not emit sequences");
4399}
4400
4401#[cfg(test)]
4402fn deterministic_high_entropy_bytes(seed: u64, len: usize) -> Vec<u8> {
4403 let mut out = Vec::with_capacity(len);
4404 let mut state = seed;
4405 for _ in 0..len {
4406 state ^= state << 13;
4407 state ^= state >> 7;
4408 state ^= state << 17;
4409 out.push((state >> 40) as u8);
4410 }
4411 out
4412}
4413
4414#[test]
4415fn hc_sparse_skip_matching_preserves_tail_cross_block_match() {
4416 let mut matcher = HcMatchGenerator::new(1 << 22);
4417 let tail = b"Qz9kLm2Rp";
4418 let mut first = deterministic_high_entropy_bytes(0xD1B5_4A32_9C77_0E19, 4096);
4419 let tail_start = first.len() - tail.len();
4420 first[tail_start..].copy_from_slice(tail);
4421 matcher.add_data(first.clone(), |_| {});
4422 matcher.skip_matching(Some(true));
4423
4424 let mut second = tail.to_vec();
4425 second.extend_from_slice(b"after-tail-literals");
4426 matcher.add_data(second, |_| {});
4427
4428 let mut first_sequence = None;
4429 matcher.start_matching(|seq| {
4430 if first_sequence.is_some() {
4431 return;
4432 }
4433 first_sequence = Some(match seq {
4434 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
4435 Sequence::Triple {
4436 literals,
4437 offset,
4438 match_len,
4439 } => (literals.len(), offset, match_len),
4440 });
4441 });
4442
4443 let (literals_len, offset, match_len) =
4444 first_sequence.expect("expected at least one sequence after sparse skip");
4445 assert_eq!(
4446 literals_len, 0,
4447 "first sequence should start at block boundary"
4448 );
4449 assert_eq!(
4450 offset,
4451 tail.len(),
4452 "first match should reference previous tail"
4453 );
4454 assert!(
4455 match_len >= tail.len(),
4456 "tail-aligned cross-block match must be preserved"
4457 );
4458}
4459
4460#[test]
4461fn hc_sparse_skip_matching_does_not_reinsert_sparse_tail_positions() {
4462 let mut matcher = HcMatchGenerator::new(1 << 22);
4463 let first = deterministic_high_entropy_bytes(0xC2B2_AE3D_27D4_EB4F, 4096);
4464 matcher.add_data(first.clone(), |_| {});
4465 matcher.skip_matching(Some(true));
4466
4467 let current_len = first.len();
4468 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
4469 let current_abs_end = current_abs_start + current_len;
4470 let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
4471 let tail_start = current_abs_end
4472 .saturating_sub(dense_tail)
4473 .max(matcher.history_abs_start)
4474 .max(current_abs_start);
4475
4476 let overlap_pos = (tail_start..current_abs_end)
4477 .find(|&pos| (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP))
4478 .expect("fixture should contain at least one sparse-grid overlap in dense tail");
4479
4480 let rel = matcher
4481 .relative_position(overlap_pos)
4482 .expect("overlap position should be representable as relative position");
4483 let chain_idx = rel as usize & ((1 << matcher.chain_log) - 1);
4484 assert_ne!(
4485 matcher.chain_table[chain_idx],
4486 rel + 1,
4487 "sparse-grid tail positions must not be reinserted (self-loop chain entry)"
4488 );
4489}
4490
4491#[test]
4492fn hc_compact_history_drains_when_threshold_crossed() {
4493 let mut hc = HcMatchGenerator::new(8);
4494 hc.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
4495 hc.history_start = 16;
4496 hc.compact_history();
4497 assert_eq!(hc.history_start, 0);
4498 assert_eq!(hc.history, b"qrstuvwxyz");
4499}
4500
4501#[test]
4502fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
4503 let mut hc = HcMatchGenerator::new(32);
4504 hc.history = b"abcdefghijklmnop".to_vec();
4505 hc.history_abs_start = 0;
4506 hc.position_base = 1;
4507 hc.ensure_tables();
4508 let before_hash = hc.hash_table.clone();
4509 let before_chain = hc.chain_table.clone();
4510
4511 hc.insert_position_no_rebase(0);
4512
4513 assert_eq!(hc.hash_table, before_hash);
4514 assert_eq!(hc.chain_table, before_chain);
4515}
4516
4517#[test]
4518fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
4519 let mut driver = MatchGeneratorDriver::new(8, 1);
4520 driver.reset(CompressionLevel::Fastest);
4521 driver.match_generator.max_window_size = 8;
4524 driver.reported_window_size = 8;
4525
4526 let base_window = driver.match_generator.max_window_size;
4527 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4528 assert_eq!(driver.match_generator.max_window_size, base_window + 24);
4529
4530 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4531 let mut space = driver.get_next_space();
4532 space.clear();
4533 space.extend_from_slice(block);
4534 driver.commit_space(space);
4535 driver.skip_matching_with_hint(None);
4536 }
4537
4538 assert_eq!(
4539 driver.dictionary_retained_budget, 0,
4540 "dictionary budget should be fully retired once primed dict slices are evicted"
4541 );
4542 assert_eq!(
4543 driver.match_generator.max_window_size, base_window,
4544 "retired dictionary budget must not remain reusable for live history"
4545 );
4546}
4547
4548#[test]
4549fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
4550 let mut driver = MatchGeneratorDriver::new(8, 1);
4551 driver.reset(CompressionLevel::Level(2));
4552 driver.dfast_matcher_mut().max_window_size = 8;
4555 driver.reported_window_size = 8;
4556
4557 let base_window = driver.dfast_matcher().max_window_size;
4558 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4559 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
4560
4561 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4562 let mut space = driver.get_next_space();
4563 space.clear();
4564 space.extend_from_slice(block);
4565 driver.commit_space(space);
4566 driver.skip_matching_with_hint(None);
4567 }
4568
4569 assert_eq!(
4570 driver.dictionary_retained_budget, 0,
4571 "dictionary budget should be fully retired once primed dict slices are evicted"
4572 );
4573 assert_eq!(
4574 driver.dfast_matcher().max_window_size,
4575 base_window,
4576 "retired dictionary budget must not remain reusable for live history"
4577 );
4578}
4579
4580#[test]
4581fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
4582 let mut driver = MatchGeneratorDriver::new(8, 1);
4583 driver.reset(CompressionLevel::Better);
4584
4585 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
4586
4587 let mut space = driver.get_next_space();
4588 space.clear();
4589 space.extend_from_slice(b"abcdefgh");
4592 driver.commit_space(space);
4593
4594 let mut saw_match = false;
4595 driver.start_matching(|seq| {
4596 if let Sequence::Triple {
4597 literals,
4598 offset,
4599 match_len,
4600 } = seq
4601 && literals.is_empty()
4602 && offset == 8
4603 && match_len >= HC_MIN_MATCH_LEN
4604 {
4605 saw_match = true;
4606 }
4607 });
4608
4609 assert!(
4610 saw_match,
4611 "hash-chain backend should match dictionary-primed history in first full block"
4612 );
4613}
4614
4615#[test]
4616fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
4617 let mut driver = MatchGeneratorDriver::new(8, 1);
4618 driver.reset(CompressionLevel::Better);
4619 driver.hc_matcher_mut().max_window_size = 8;
4621 driver.reported_window_size = 8;
4622
4623 let base_window = driver.hc_matcher().max_window_size;
4624 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
4625 assert_eq!(driver.hc_matcher().max_window_size, base_window + 24);
4626
4627 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
4628 let mut space = driver.get_next_space();
4629 space.clear();
4630 space.extend_from_slice(block);
4631 driver.commit_space(space);
4632 driver.skip_matching_with_hint(None);
4633 }
4634
4635 assert_eq!(
4636 driver.dictionary_retained_budget, 0,
4637 "dictionary budget should be fully retired once primed dict slices are evicted"
4638 );
4639 assert_eq!(
4640 driver.hc_matcher().max_window_size,
4641 base_window,
4642 "retired dictionary budget must not remain reusable for live history"
4643 );
4644}
4645
4646#[test]
4647fn hc_rebases_positions_after_u32_boundary() {
4648 let mut matcher = HcMatchGenerator::new(64);
4649 matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4650 matcher.ensure_tables();
4651 matcher.position_base = 0;
4652 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
4653 Ok(value) => value,
4654 Err(_) => return,
4655 };
4656 matcher.history_abs_start = history_abs_start;
4659 matcher.skip_matching(None);
4660 assert_eq!(
4661 matcher.position_base, matcher.history_abs_start,
4662 "rebase should anchor to the oldest live absolute position"
4663 );
4664
4665 assert!(
4666 matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY),
4667 "HC hash table should still be populated after crossing u32 boundary"
4668 );
4669
4670 let abs_pos = matcher.history_abs_start + 10;
4672 let candidates = matcher.chain_candidates(abs_pos);
4673 assert!(
4674 candidates.iter().any(|candidate| *candidate != usize::MAX),
4675 "chain_candidates should return valid matches after rebase"
4676 );
4677}
4678
4679#[test]
4680fn hc_rebase_rebuilds_only_inserted_prefix() {
4681 let mut matcher = HcMatchGenerator::new(64);
4682 matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4683 matcher.ensure_tables();
4684 matcher.position_base = 0;
4685 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
4686 Ok(value) => value,
4687 Err(_) => return,
4688 };
4689 matcher.history_abs_start = history_abs_start;
4690 let abs_pos = matcher.history_abs_start + 6;
4691
4692 let mut expected = HcMatchGenerator::new(64);
4693 expected.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
4694 expected.ensure_tables();
4695 expected.history_abs_start = history_abs_start;
4696 expected.position_base = expected.history_abs_start;
4697 expected.hash_table.fill(HC_EMPTY);
4698 expected.chain_table.fill(HC_EMPTY);
4699 for pos in expected.history_abs_start..abs_pos {
4700 expected.insert_position_no_rebase(pos);
4701 }
4702
4703 matcher.maybe_rebase_positions(abs_pos);
4704
4705 assert_eq!(
4706 matcher.position_base, matcher.history_abs_start,
4707 "rebase should still anchor to the oldest live absolute position"
4708 );
4709 assert_eq!(
4710 matcher.hash_table, expected.hash_table,
4711 "rebase must rebuild only positions already inserted before abs_pos"
4712 );
4713 assert_eq!(
4714 matcher.chain_table, expected.chain_table,
4715 "future positions must not be pre-seeded into HC chains during rebase"
4716 );
4717}
4718
4719#[test]
4720fn suffix_store_with_single_slot_does_not_panic_on_keying() {
4721 let mut suffixes = SuffixStore::with_capacity(1);
4722 suffixes.insert(b"abcde", 0);
4723 assert!(suffixes.contains_key(b"abcde"));
4724 assert_eq!(suffixes.get(b"abcde"), Some(0));
4725}
4726
4727#[test]
4728fn fastest_reset_uses_interleaved_hash_fill_step() {
4729 let mut driver = MatchGeneratorDriver::new(32, 2);
4730
4731 driver.reset(CompressionLevel::Uncompressed);
4732 assert_eq!(driver.match_generator.hash_fill_step, 1);
4733
4734 driver.reset(CompressionLevel::Fastest);
4735 assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP);
4736
4737 driver.reset(CompressionLevel::Better);
4740 assert_eq!(driver.active_backend, MatcherBackend::HashChain);
4741 assert_eq!(driver.window_size(), (1u64 << 23));
4742 assert_eq!(driver.hc_matcher().lazy_depth, 2);
4743}
4744
4745#[test]
4746fn simple_matcher_updates_offset_history_after_emitting_match() {
4747 let mut matcher = MatchGenerator::new(64);
4748 matcher.add_data(
4749 b"abcdeabcdeabcde".to_vec(),
4750 SuffixStore::with_capacity(64),
4751 |_, _| {},
4752 );
4753
4754 assert!(matcher.next_sequence(|seq| {
4755 assert_eq!(
4756 seq,
4757 Sequence::Triple {
4758 literals: b"abcde",
4759 offset: 5,
4760 match_len: 10,
4761 }
4762 );
4763 }));
4764 assert_eq!(matcher.offset_hist, [5, 1, 4]);
4765}
4766
4767#[test]
4768fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
4769 let mut matcher = MatchGenerator::new(64);
4770 matcher.add_data(
4771 b"abcdefghijabcdefghij".to_vec(),
4772 SuffixStore::with_capacity(64),
4773 |_, _| {},
4774 );
4775
4776 matcher.suffix_idx = 10;
4777 matcher.last_idx_in_sequence = 10;
4778 matcher.offset_hist = [99, 10, 4];
4779
4780 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4781 assert_eq!(candidate, Some((10, 10)));
4782}
4783
4784#[test]
4785fn simple_matcher_repcode_can_target_previous_window_entry() {
4786 let mut matcher = MatchGenerator::new(64);
4787 matcher.add_data(
4788 b"abcdefghij".to_vec(),
4789 SuffixStore::with_capacity(64),
4790 |_, _| {},
4791 );
4792 matcher.skip_matching();
4793 matcher.add_data(
4794 b"abcdefghij".to_vec(),
4795 SuffixStore::with_capacity(64),
4796 |_, _| {},
4797 );
4798
4799 matcher.offset_hist = [99, 10, 4];
4800
4801 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
4802 assert_eq!(candidate, Some((10, 10)));
4803}
4804
4805#[test]
4806fn simple_matcher_zero_literal_repcode_checks_rep2() {
4807 let mut matcher = MatchGenerator::new(64);
4808 matcher.add_data(
4809 b"abcdefghijabcdefghij".to_vec(),
4810 SuffixStore::with_capacity(64),
4811 |_, _| {},
4812 );
4813 matcher.suffix_idx = 10;
4814 matcher.last_idx_in_sequence = 10;
4815 matcher.offset_hist = [99, 4, 10];
4817
4818 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4819 assert_eq!(candidate, Some((10, 10)));
4820}
4821
4822#[test]
4823fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
4824 let mut matcher = MatchGenerator::new(64);
4825 matcher.add_data(
4826 b"abcdefghijabcdefghij".to_vec(),
4827 SuffixStore::with_capacity(64),
4828 |_, _| {},
4829 );
4830 matcher.suffix_idx = 10;
4831 matcher.last_idx_in_sequence = 10;
4832 matcher.offset_hist = [11, 4, 99];
4834
4835 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
4836 assert_eq!(candidate, Some((10, 10)));
4837}
4838
4839#[test]
4840fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
4841 let mut matcher = MatchGenerator::new(64);
4842 matcher.add_data(
4843 b"abcdefghij".to_vec(),
4844 SuffixStore::with_capacity(64),
4845 |_, _| {},
4846 );
4847 matcher.skip_matching();
4848 matcher.add_data(
4849 b"klmnopqrst".to_vec(),
4850 SuffixStore::with_capacity(64),
4851 |_, _| {},
4852 );
4853 matcher.suffix_idx = 3;
4854
4855 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
4856 assert_eq!(candidate, None);
4857}
4858
4859#[test]
4860fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
4861 let mut matcher = MatchGenerator::new(64);
4862 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4863 matcher.add_data(
4864 b"abcdefghijklmnop".to_vec(),
4865 SuffixStore::with_capacity(64),
4866 |_, _| {},
4867 );
4868 matcher.skip_matching();
4869 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
4870
4871 assert!(matcher.next_sequence(|seq| {
4872 assert_eq!(
4873 seq,
4874 Sequence::Triple {
4875 literals: b"",
4876 offset: 15,
4877 match_len: 5,
4878 }
4879 );
4880 }));
4881 assert!(!matcher.next_sequence(|_| {}));
4882}
4883
4884#[test]
4885fn simple_matcher_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
4886 let mut matcher = MatchGenerator::new(128);
4887 let first = b"abcdefghijklmnopqrstuvwxyz012345".to_vec();
4888 let sparse_probe = first[3..3 + MIN_MATCH_LEN].to_vec();
4889 let tail_start = first.len() - MIN_MATCH_LEN;
4890 let tail_probe = first[tail_start..tail_start + MIN_MATCH_LEN].to_vec();
4891 matcher.add_data(first, SuffixStore::with_capacity(256), |_, _| {});
4892
4893 matcher.skip_matching_with_hint(Some(true));
4894
4895 matcher.add_data(sparse_probe, SuffixStore::with_capacity(256), |_, _| {});
4897 let mut sparse_first_is_literals = None;
4898 assert!(matcher.next_sequence(|seq| {
4899 if sparse_first_is_literals.is_none() {
4900 sparse_first_is_literals = Some(matches!(seq, Sequence::Literals { .. }));
4901 }
4902 }));
4903 assert!(
4904 sparse_first_is_literals.unwrap_or(false),
4905 "sparse-start probe should not produce an immediate match"
4906 );
4907
4908 let mut matcher = MatchGenerator::new(128);
4910 matcher.add_data(
4911 b"abcdefghijklmnopqrstuvwxyz012345".to_vec(),
4912 SuffixStore::with_capacity(256),
4913 |_, _| {},
4914 );
4915 matcher.skip_matching_with_hint(Some(true));
4916 matcher.add_data(tail_probe, SuffixStore::with_capacity(256), |_, _| {});
4917 let mut tail_first_is_immediate_match = None;
4918 assert!(matcher.next_sequence(|seq| {
4919 if tail_first_is_immediate_match.is_none() {
4920 tail_first_is_immediate_match =
4921 Some(matches!(seq, Sequence::Triple { literals, .. } if literals.is_empty()));
4922 }
4923 }));
4924 assert!(
4925 tail_first_is_immediate_match.unwrap_or(false),
4926 "dense tail probe should match immediately at block start"
4927 );
4928}
4929
4930#[test]
4931fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
4932 let mut matcher = MatchGenerator::new(64);
4933 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4934 matcher.add_data(
4935 b"01234abcde".to_vec(),
4936 SuffixStore::with_capacity(64),
4937 |_, _| {},
4938 );
4939 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
4940
4941 let last = matcher.window.last().unwrap();
4942 let tail = &last.data[5..10];
4943 assert_eq!(last.suffixes.get(tail), Some(5));
4944}
4945
4946#[test]
4947fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
4948 let mut matcher = MatchGenerator::new(128);
4949 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4950 matcher.add_data(
4951 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4952 SuffixStore::with_capacity(1 << 16),
4953 |_, _| {},
4954 );
4955
4956 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
4957
4958 let last = matcher.window.last().unwrap();
4959 let first_key = &last.data[..MIN_MATCH_LEN];
4960 assert_eq!(last.suffixes.get(first_key), None);
4961}
4962
4963#[test]
4964fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
4965 let mut matcher = MatchGenerator::new(128);
4966 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
4967 matcher.add_data(
4968 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
4969 SuffixStore::with_capacity(1 << 16),
4970 |_, _| {},
4971 );
4972
4973 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
4974
4975 let last = matcher.window.last().unwrap();
4976 for pos in [0usize, 3, 6, 9, 12] {
4977 let key = &last.data[pos..pos + MIN_MATCH_LEN];
4978 assert_eq!(
4979 last.suffixes.get(key),
4980 Some(pos),
4981 "expected interleaved suffix registration at pos {pos}"
4982 );
4983 }
4984}
4985
4986#[test]
4987fn dfast_skip_matching_handles_window_eviction() {
4988 let mut matcher = DfastMatchGenerator::new(16);
4989
4990 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
4991 matcher.skip_matching(None);
4992 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4993 matcher.skip_matching(None);
4994 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
4995
4996 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
4997 matcher.start_matching(|seq| match seq {
4998 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4999 Sequence::Triple {
5000 literals,
5001 offset,
5002 match_len,
5003 } => {
5004 reconstructed.extend_from_slice(literals);
5005 let start = reconstructed.len() - offset;
5006 for i in 0..match_len {
5007 let byte = reconstructed[start + i];
5008 reconstructed.push(byte);
5009 }
5010 }
5011 });
5012
5013 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
5014}
5015
5016#[test]
5017fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
5018 let mut matcher = DfastMatchGenerator::new(8);
5019
5020 let mut first = Vec::with_capacity(64);
5021 first.extend_from_slice(b"abcdefgh");
5022 matcher.add_data(first, |_| {});
5023
5024 let mut second = Vec::with_capacity(64);
5025 second.extend_from_slice(b"ijklmnop");
5026
5027 let mut observed_evicted_len = None;
5028 matcher.add_data(second, |data| {
5029 observed_evicted_len = Some(data.len());
5030 });
5031
5032 assert_eq!(
5033 observed_evicted_len,
5034 Some(8),
5035 "eviction callback must report evicted byte length, not backing capacity"
5036 );
5037}
5038
5039#[test]
5040fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
5041 let mut matcher = DfastMatchGenerator::new(16);
5042
5043 let mut first = Vec::with_capacity(64);
5044 first.extend_from_slice(b"abcdefgh");
5045 matcher.add_data(first, |_| {});
5046
5047 let mut second = Vec::with_capacity(64);
5048 second.extend_from_slice(b"ijklmnop");
5049 matcher.add_data(second, |_| {});
5050
5051 matcher.max_window_size = 8;
5052
5053 let mut observed_evicted_len = None;
5054 matcher.trim_to_window(|data| {
5055 observed_evicted_len = Some(data.len());
5056 });
5057
5058 assert_eq!(
5059 observed_evicted_len,
5060 Some(8),
5061 "trim callback must report evicted byte length, not backing capacity"
5062 );
5063}
5064
5065#[test]
5066fn dfast_inserts_tail_positions_for_next_block_matching() {
5067 let mut matcher = DfastMatchGenerator::new(1 << 22);
5068
5069 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
5070 let mut history = Vec::new();
5071 matcher.start_matching(|seq| match seq {
5072 Sequence::Literals { literals } => history.extend_from_slice(literals),
5073 Sequence::Triple { .. } => unreachable!("first block should not match history"),
5074 });
5075 assert_eq!(history, b"012345bcdea");
5076
5077 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
5078 let mut saw_first_sequence = false;
5079 matcher.start_matching(|seq| {
5080 assert!(!saw_first_sequence, "expected a single cross-block match");
5081 saw_first_sequence = true;
5082 match seq {
5083 Sequence::Literals { .. } => {
5084 panic!("expected tail-anchored cross-block match before any literals")
5085 }
5086 Sequence::Triple {
5087 literals,
5088 offset,
5089 match_len,
5090 } => {
5091 assert_eq!(literals, b"");
5092 assert_eq!(offset, 5);
5093 assert_eq!(match_len, 11);
5094 let start = history.len() - offset;
5095 for i in 0..match_len {
5096 let byte = history[start + i];
5097 history.push(byte);
5098 }
5099 }
5100 }
5101 });
5102
5103 assert!(
5104 saw_first_sequence,
5105 "expected tail-anchored cross-block match"
5106 );
5107 assert_eq!(history, b"012345bcdeabcdeabcdeab");
5108}
5109
5110#[test]
5111fn dfast_dense_skip_matching_backfills_previous_tail_for_next_block() {
5112 let mut matcher = DfastMatchGenerator::new(1 << 22);
5113 let tail = b"Qz9kLm2Rp";
5114 let mut first = b"0123456789abcdef".to_vec();
5115 first.extend_from_slice(tail);
5116 matcher.add_data(first.clone(), |_| {});
5117 matcher.skip_matching(Some(false));
5118
5119 let mut second = tail.to_vec();
5120 second.extend_from_slice(b"after-tail-literals");
5121 matcher.add_data(second, |_| {});
5122
5123 let mut first_sequence = None;
5124 matcher.start_matching(|seq| {
5125 if first_sequence.is_some() {
5126 return;
5127 }
5128 first_sequence = Some(match seq {
5129 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
5130 Sequence::Triple {
5131 literals,
5132 offset,
5133 match_len,
5134 } => (literals.len(), offset, match_len),
5135 });
5136 });
5137
5138 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
5139 assert_eq!(
5140 lit_len, 0,
5141 "expected immediate cross-block match at block start"
5142 );
5143 assert_eq!(
5144 offset,
5145 tail.len(),
5146 "expected dense skip to preserve cross-boundary tail match"
5147 );
5148 assert!(
5149 match_len >= DFAST_MIN_MATCH_LEN,
5150 "match length should satisfy dfast minimum match length"
5151 );
5152}
5153
5154#[test]
5155fn dfast_sparse_skip_matching_preserves_tail_cross_block_match() {
5156 let mut matcher = DfastMatchGenerator::new(1 << 22);
5157 let tail = b"Qz9kLm2Rp";
5158 let mut first = deterministic_high_entropy_bytes(0x9E37_79B9_7F4A_7C15, 4096);
5159 let tail_start = first.len() - tail.len();
5160 first[tail_start..].copy_from_slice(tail);
5161 matcher.add_data(first.clone(), |_| {});
5162
5163 matcher.skip_matching(Some(true));
5164
5165 let mut second = tail.to_vec();
5166 second.extend_from_slice(b"after-tail-literals");
5167 matcher.add_data(second, |_| {});
5168
5169 let mut first_sequence = None;
5170 matcher.start_matching(|seq| {
5171 if first_sequence.is_some() {
5172 return;
5173 }
5174 first_sequence = Some(match seq {
5175 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
5176 Sequence::Triple {
5177 literals,
5178 offset,
5179 match_len,
5180 } => (literals.len(), offset, match_len),
5181 });
5182 });
5183
5184 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
5185 assert_eq!(
5186 lit_len, 0,
5187 "expected immediate cross-block match at block start"
5188 );
5189 assert_eq!(
5190 offset,
5191 tail.len(),
5192 "expected match against densely seeded tail"
5193 );
5194 assert!(
5195 match_len >= DFAST_MIN_MATCH_LEN,
5196 "match length should satisfy dfast minimum match length"
5197 );
5198}
5199
5200#[test]
5201fn dfast_skip_matching_dense_backfills_newly_hashable_long_tail_positions() {
5202 let mut matcher = DfastMatchGenerator::new(1 << 22);
5203 let first = deterministic_high_entropy_bytes(0x7A64_0315_D4E1_91C3, 4096);
5204 let first_len = first.len();
5205 matcher.add_data(first, |_| {});
5206 matcher.skip_matching_dense();
5207
5208 matcher.add_data(alloc::vec![0xAB], |_| {});
5211 matcher.skip_matching_dense();
5212
5213 let target_abs_pos = first_len - 7;
5214 let target_rel = target_abs_pos - matcher.history_abs_start;
5215 let live = matcher.live_history();
5216 assert!(
5217 target_rel + 8 <= live.len(),
5218 "fixture must make the boundary start long-hashable"
5219 );
5220 let long_hash = matcher.hash8(&live[target_rel..]);
5221 assert!(
5222 matcher.long_hash[long_hash].contains(&target_abs_pos),
5223 "dense skip must seed long-hash entry for newly hashable boundary start"
5224 );
5225}
5226
5227#[test]
5228fn dfast_seed_remaining_hashable_starts_seeds_last_short_hash_positions() {
5229 let mut matcher = DfastMatchGenerator::new(1 << 20);
5230 let block = deterministic_high_entropy_bytes(0x13F0_9A6D_55CE_7B21, 64);
5231 matcher.add_data(block, |_| {});
5232 matcher.ensure_hash_tables();
5233
5234 let current_len = matcher.window.back().unwrap().len();
5235 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
5236 let seed_start = current_len - DFAST_MIN_MATCH_LEN;
5237 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, seed_start);
5238
5239 let target_abs_pos = current_abs_start + current_len - 4;
5240 let target_rel = target_abs_pos - matcher.history_abs_start;
5241 let live = matcher.live_history();
5242 assert!(
5243 target_rel + 4 <= live.len(),
5244 "fixture must leave the last short-hash start valid"
5245 );
5246 let short_hash = matcher.hash4(&live[target_rel..]);
5247 assert!(
5248 matcher.short_hash[short_hash].contains(&target_abs_pos),
5249 "tail seeding must include the last 4-byte-hashable start"
5250 );
5251}
5252
5253#[test]
5254fn dfast_seed_remaining_hashable_starts_handles_pos_at_block_end() {
5255 let mut matcher = DfastMatchGenerator::new(1 << 20);
5256 let block = deterministic_high_entropy_bytes(0x7BB2_DA91_441E_C0EF, 64);
5257 matcher.add_data(block, |_| {});
5258 matcher.ensure_hash_tables();
5259
5260 let current_len = matcher.window.back().unwrap().len();
5261 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
5262 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, current_len);
5263
5264 let target_abs_pos = current_abs_start + current_len - 4;
5265 let target_rel = target_abs_pos - matcher.history_abs_start;
5266 let live = matcher.live_history();
5267 assert!(
5268 target_rel + 4 <= live.len(),
5269 "fixture must leave the last short-hash start valid"
5270 );
5271 let short_hash = matcher.hash4(&live[target_rel..]);
5272 assert!(
5273 matcher.short_hash[short_hash].contains(&target_abs_pos),
5274 "tail seeding must still include the last 4-byte-hashable start when pos is at block end"
5275 );
5276}
5277
5278#[test]
5279fn dfast_sparse_skip_matching_backfills_previous_tail_for_consecutive_sparse_blocks() {
5280 let mut matcher = DfastMatchGenerator::new(1 << 22);
5281 let boundary_prefix = [0xFA, 0xFB, 0xFC];
5282 let boundary_suffix = [0xFD, 0xEE, 0xAD, 0xBE, 0xEF, 0x11, 0x22, 0x33];
5283
5284 let mut first = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
5285 let first_tail_start = first.len() - boundary_prefix.len();
5286 first[first_tail_start..].copy_from_slice(&boundary_prefix);
5287 matcher.add_data(first, |_| {});
5288 matcher.skip_matching(Some(true));
5289
5290 let mut second = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
5291 second[..boundary_suffix.len()].copy_from_slice(&boundary_suffix);
5292 matcher.add_data(second.clone(), |_| {});
5293 matcher.skip_matching(Some(true));
5294
5295 let mut third = boundary_prefix.to_vec();
5296 third.extend_from_slice(&boundary_suffix);
5297 third.extend_from_slice(b"-trailing-literals");
5298 matcher.add_data(third, |_| {});
5299
5300 let mut first_sequence = None;
5301 matcher.start_matching(|seq| {
5302 if first_sequence.is_some() {
5303 return;
5304 }
5305 first_sequence = Some(match seq {
5306 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
5307 Sequence::Triple {
5308 literals,
5309 offset,
5310 match_len,
5311 } => (literals.len(), offset, match_len),
5312 });
5313 });
5314
5315 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
5316 assert_eq!(
5317 lit_len, 0,
5318 "expected immediate match from the prior sparse-skip boundary"
5319 );
5320 assert_eq!(
5321 offset,
5322 second.len() + boundary_prefix.len(),
5323 "expected match against backfilled first→second boundary start"
5324 );
5325 assert!(
5326 match_len >= DFAST_MIN_MATCH_LEN,
5327 "match length should satisfy dfast minimum match length"
5328 );
5329}
5330
5331#[test]
5332fn fastest_hint_iteration_23_sequences_reconstruct_source() {
5333 fn generate_data(seed: u64, len: usize) -> Vec<u8> {
5334 let mut state = seed;
5335 let mut data = Vec::with_capacity(len);
5336 for _ in 0..len {
5337 state = state
5338 .wrapping_mul(6364136223846793005)
5339 .wrapping_add(1442695040888963407);
5340 data.push((state >> 33) as u8);
5341 }
5342 data
5343 }
5344
5345 let i = 23u64;
5346 let len = (i * 89 % 16384) as usize;
5347 let mut data = generate_data(i, len);
5348 let repeat = data[128..256].to_vec();
5351 data.extend_from_slice(&repeat);
5352 data.extend_from_slice(&repeat);
5353
5354 let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
5355 driver.set_source_size_hint(data.len() as u64);
5356 driver.reset(CompressionLevel::Fastest);
5357 let mut space = driver.get_next_space();
5358 space[..data.len()].copy_from_slice(&data);
5359 space.truncate(data.len());
5360 driver.commit_space(space);
5361
5362 let mut rebuilt = Vec::with_capacity(data.len());
5363 let mut saw_triple = false;
5364 driver.start_matching(|seq| match seq {
5365 Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
5366 Sequence::Triple {
5367 literals,
5368 offset,
5369 match_len,
5370 } => {
5371 saw_triple = true;
5372 rebuilt.extend_from_slice(literals);
5373 assert!(offset > 0, "offset must be non-zero");
5374 assert!(
5375 offset <= rebuilt.len(),
5376 "offset must reference already-produced bytes: offset={} produced={}",
5377 offset,
5378 rebuilt.len()
5379 );
5380 let start = rebuilt.len() - offset;
5381 for idx in 0..match_len {
5382 let b = rebuilt[start + idx];
5383 rebuilt.push(b);
5384 }
5385 }
5386 });
5387
5388 assert!(saw_triple, "fixture must emit at least one match");
5389 assert_eq!(rebuilt, data);
5390}